### Importing libraries

In [1]:
import os
from dotenv import load_dotenv
from langchain_classic.chains import create_retrieval_chain
from langchain_classic.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader, TextLoader
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import Chroma
load_dotenv(override=True)
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_PROJECT"] = os.getenv("LANGCHAIN_PROJECT")
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

### Loading the document

In [2]:
file_path = '/Users/mohamed_sharif/Downloads/Final CBA 2019-2022 with signatures.pdf'
loader = PyPDFLoader(file_path)
document = loader.load()

### Chunking the document

In [3]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size =1000,
    chunk_overlap = 200,
    length_function = len,
    separators= ["\n\n", "\n", " ", ""]
)

chunks = text_splitter.split_documents(document)


### Embedding Model and Vector Database

In [4]:
embeddings = OpenAIEmbeddings(
    model="text-embedding-3-small"
)

In [5]:
vectorstore = Chroma.from_documents(
    documents=chunks,
    embedding=embeddings,
    persist_directory='./chroma_db'
)

### LLM

In [6]:
llm = ChatOpenAI(
    model="gpt-4o",
    temperature=0.7
)

### Retrieval Chain

In [14]:
system_instructions = """
You are an expert document reviewer. Use the provided context to answer the user's question accurately.

Critical:
 - If the answer is not in {context} say you can not provide an answer.

Context:
{context}
"""



prompt = ChatPromptTemplate.from_messages([
    ("system", system_instructions),
    ("human", "{input}")
])

In [8]:
qa_chain =  create_stuff_documents_chain(llm, prompt)
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
rag_chain = create_retrieval_chain(retriever, qa_chain)

In [17]:
input = "How many warnings before getting fired for disciplinary issues"
response = rag_chain.invoke({"input":input})
print(response["answer"])


