to access the openAI API, you need an API key which you can get it from  https://platform.openai.com/api-keys
once you have a key then you can either save it as an environment variable or hard code in as needed from simplicity

In [1]:
from langchain_community.vectorstores import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain_openai.chat_models import ChatOpenAI
from langchain_openai.embeddings import OpenAIEmbeddings
import os
from dotenv import load_dotenv


load_dotenv(override=True)

# check if USE_OLLAMA is True
if os.getenv('USE_OLLAMA') == 'True':
    print('Using OLLAMA')
if os.getenv('USE_OPENAI') == 'True':
    print('Using OpenAI') 



Using OLLAMA


In [2]:
import os
from langchain_community.llms import Ollama
from langchain_community.chat_models import ChatOllama

template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

#model = ChatOpenAI(openai_api_key=os.environ["OPENAI_API_KEY"], model_name="gpt-4-0125-preview")
model = ChatOllama(model="llama2")

output_parser = StrOutputParser()


Use document loaders to load data from a source as Document

Facebook AI Similarity Search (Faiss) is a library for efficient similarity search and clustering of dense vectors.

search result will be fetched by retriever and it'll be used as context

In [4]:
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import GPT4AllEmbeddings

import os

raw_documents = TextLoader('../article/funnyIO_development.md').load()

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
documents = text_splitter.split_documents(raw_documents)

#vectorstore = FAISS.from_documents(documents, embedding=OpenAIEmbeddings(openai_api_key=os.environ["OPENAI_API_KEY"]))
vectorstore = FAISS.from_documents(documents, embedding=GPT4AllEmbeddings())


retriever = vectorstore.as_retriever()

In [5]:
from pprint import pprint

chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt 
    | model 
    | output_parser
)

pprint(chain.invoke("tell me about the funnyIO"))

('Based on the provided context, I can provide information about FunnyIO as '
 'follows:\n'
 '\n'
 '1. FunnyIO is a high-performance and scalable host controller interface '
 'designed for accessing solid-state drives (SSDs) over a PCIExpress (PCIe) '
 'bus.\n'
 '2. The FunnyIO protocol provides efficient command submission and completion '
 'mechanisms, enabling high-speed data transfers between the host and the '
 'storage device.\n'
 '3. Developing FunnyIO drivers is crucial for enabling efficient '
 'communication between the operating system and FunnyIO storage devices.\n'
 '4. Understanding the PCIe interface and communication protocols is essential '
 'for developing efficient FunnyIO drivers.\n'
 '5. Memory management is a critical aspect of FunnyIO driver development, as '
 'proper memory allocation and deallocation strategies are necessary to '
 'maintain system stability and performance.\n'
 '6. Command queues are used by FunnyIO drivers to submit and process I/O '
 'command

you can also load directory using directory loader.
and split a long document into smaller chunks that can fit into your model's context window.

In [None]:
%pip install markdown

In [8]:
from langchain_community.document_loaders import DirectoryLoader
from langchain_community.document_loaders import PythonLoader
from langchain_community.document_loaders import TextLoader
from langchain_community.document_loaders import UnstructuredMarkdownLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter


#raw_documents = DirectoryLoader('../article', glob="*.py", loader_cls=PythonLoader).load()
#raw_documents = DirectoryLoader('../article', glob="*.txt", loader_cls=TextLoader).load()
raw_documents = DirectoryLoader('../article', glob="*.md", loader_cls=UnstructuredMarkdownLoader).load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
documents = text_splitter.split_documents(raw_documents)

#vectorstore = FAISS.from_documents(documents, embedding=OpenAIEmbeddings(openai_api_key=os.environ["OPENAI_API_KEY"]))
vectorstore = FAISS.from_documents(documents, embedding=GPT4AllEmbeddings())

retriever = vectorstore.as_retriever()


invoke question

In [9]:
from pprint import pprint

chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt 
    | model 
    | output_parser
)

pprint(chain.invoke("tell me about the emerging funnnnyIO"))

('Based on the provided context, there is no information available about an '
 '"emerging FunnyIO." The articles focus on the basics of FunnyIO driver '
 'development and provide insights into various aspects of FunnyIO, including '
 'its protocol, PCIe interface, memory management, command queues, error '
 'handling, and more.\n'
 '\n'
 'Therefore, I cannot provide information about an "emerging FunnyIO" as there '
 'is no such topic discussed in the provided articles.')
