In [20]:
import os
import openai
from dotenv import load_dotenv, find_dotenv
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.schema import Document
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI

_ = load_dotenv(find_dotenv())
openai.api_key = os.environ['OPENAI_API_KEY']


loader = PyPDFLoader("docs/Flynn's classification.pdf")
pages = loader.load()

r_splitter = RecursiveCharacterTextSplitter(
    chunk_size=150,
    chunk_overlap=0,
    separators=["\n\n", "\n", "(?<=\. )", " ", ""]
)
docs = r_splitter.split_documents(pages)

embedding = OpenAIEmbeddings()
persist_directory = 'docs/chroma/'
metadata_docs = [Document(page_content=doc.page_content, metadata={"source": "docs/Flynn's classification.pdf"}) for doc in docs]
vectordb = Chroma.from_documents(
    documents=metadata_docs,
    embedding=embedding,
    persist_directory=persist_directory
)


vectordb.persist()


query = input("Enter your query related to flynn's classification document:")
llm_name = 'gpt-3.5-turbo'  
llm = ChatOpenAI(model_name=llm_name, temperature=0)
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectordb.as_retriever()
)
result = qa_chain({"query": query})


print(result["result"])

Enter your query related to flynn's classification document:taxonamy
Taxonomy is a classification system proposed by Michael J. Flynn in 1966. It is used to categorize computer architectures based on the number of instruction streams and data streams they can handle.
