In [1]:
# Import necessary libraries
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_ollama import OllamaEmbeddings, OllamaLLM
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.chat_models import ChatOpenAI
import os
from glob import glob
from dotenv import load_dotenv

In [2]:
# Load environment variables
load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")

In [3]:
# Define constants
EMBEDDINGS_MODEL = "nomic-embed-text"
LLM_MODEL = "llama3.2:3b"
DATA_FOLDER = "./data"
CHROMA_PERSIST_DIR = "./chroma_db"
CHROMA_COLLECTION_NAME = "local_rag_db"

In [4]:
# Load and process PDF files from the './data' folder
documents = []
pdf_files = glob(os.path.join(DATA_FOLDER, "*.pdf"))
for file_path in pdf_files:
    loader = PyPDFLoader(file_path)
    documents.extend(loader.load())

In [5]:
# Split documents into chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
chunks = splitter.split_documents(documents)

In [6]:
# Create embeddings and vectorstore
embeddings = OllamaEmbeddings(model=EMBEDDINGS_MODEL)
vectorstore = Chroma.from_documents(
    documents=chunks,
    embedding=embeddings,
    persist_directory=CHROMA_PERSIST_DIR,
    collection_name=CHROMA_COLLECTION_NAME
)

In [7]:
# Initialize LLM and QA chain
#llm = OllamaLLM(model=LLM_MODEL)
llm = ChatOpenAI(model="gpt-4", temperature=0, openai_api_key=openai_api_key)
retriever = vectorstore.as_retriever()
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

qa_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=memory
)

  llm = ChatOpenAI(model="gpt-4", temperature=0, openai_api_key=openai_api_key)
  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)


In [9]:
query = "When Bhagat Singht Born?"
response = qa_chain.run(query)
print(response)

Bhagat Singh was born on 27 September 1907.


In [10]:
memory

ConversationBufferMemory(chat_memory=InMemoryChatMessageHistory(messages=[HumanMessage(content='Please remeber secerte word CAT', additional_kwargs={}, response_metadata={}), AIMessage(content="I'm sorry, but your request doesn't seem to be related to the provided context. Could you please provide more information or clarify your question?", additional_kwargs={}, response_metadata={}), HumanMessage(content='When Bhagat Singht Born?', additional_kwargs={}, response_metadata={}), AIMessage(content='Bhagat Singh was born on 27 September 1907.', additional_kwargs={}, response_metadata={})]), return_messages=True, memory_key='chat_history')

In [11]:
query = "When he is died?"
response = qa_chain.run(query)
print(response)

The text doesn't provide information on when Bhagat Singh died.


In [12]:
query = "WhenKuvempe is born?"
response = qa_chain.run(query)
print(response)

Kuvempu was born on 29 December 1904.


In [13]:
query = "When he is died?"
response = qa_chain.run(query)
print(response)

Kuvempu died on 11 November 1994.
