In [1]:
import os
from langchain.document_loaders import PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain_community.llms import Ollama
from langchain.chains import RetrievalQA
from collections import defaultdict

In [2]:
pdf_folder_path = './data/'


In [3]:
all_text = ""


In [4]:
for filename in os.listdir(pdf_folder_path):
    if filename.endswith('.pdf'):
        pdf_path = os.path.join(pdf_folder_path, filename)
        loader = PyMuPDFLoader(pdf_path)
        docs = loader.load()
        for doc in docs:
            all_text += doc.page_content + "\n" 

print(f"Total text length: {len(all_text)} characters.")

Total text length: 208574 characters.


In [5]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=200)


In [6]:
chunks = text_splitter.split_text(all_text)


In [7]:
print(f"Number of chunks created: {len(chunks)}")
print(f"Sample chunk: {chunks[0]}")

Number of chunks created: 690
Sample chunk: Large Language Models: A Survey
Shervin Minaee, Tomas Mikolov, Narjes Nikzad, Meysam Chenaghlu
Richard Socher, Xavier Amatriain, Jianfeng Gao
Abstract—Large Language Models (LLMs) have drawn a
lot of attention due to their strong performance on a wide
range of natural language tasks, since the release of ChatGPT
in November 2022. LLMs’ ability of general-purpose language
understanding and generation is acquired by training billions of


In [8]:
docs = [Document(page_content=chunk) for chunk in chunks]


In [9]:
embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


  warn_deprecated(
  from tqdm.autonotebook import tqdm, trange
Found Intel OpenMP ('libiomp') and LLVM OpenMP ('libomp') loaded at
the same time. Both libraries are known to be incompatible and this
can cause random crashes or deadlocks on Linux when loaded in the
same Python program.
Using threadpoolctl may cause crashes or deadlocks. For more
information and possible workarounds, please see
    https://github.com/joblib/threadpoolctl/blob/master/multiple_openmp.md



In [10]:
db2 = Chroma.from_documents(docs, embedding=embedding, persist_directory="./chroma_db/RRF")


In [11]:
db3 = Chroma(persist_directory="./chroma_db/RRF", embedding_function=embedding)


  warn_deprecated(


In [12]:
retriever = db3.as_retriever(search_kwargs={"k": 3})


In [13]:
llm = Ollama(model="llama3")


In [14]:
chain = RetrievalQA.from_chain_type(
    llm, retriever=retriever
)

In [15]:
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import MessagesPlaceholder
from langchain_core.prompts import ChatPromptTemplate

contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)

In [16]:
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

In [17]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)


question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

# Manually handle Chat history

In [18]:
chat_history = []


In [28]:
from langchain_core.messages import AIMessage, HumanMessage
question = "What is Search types discuss,only give types?"
ai_msg_1 = rag_chain.invoke({"input": question, "chat_history": chat_history})
# manually add in chathistory

chat_history.extend(
    [
        HumanMessage(content=question),
        AIMessage(content=ai_msg_1["answer"]),
    ]
)

In [29]:
ai_msg_1

{'input': 'What is Search types discuss,only give types?',
 'chat_history': [HumanMessage(content='What is Search types discuss,only give types?'),
  AIMessage(content="According to the provided context, search types discussed are:\n\n* Google search engine\n* Retrieval augmented generation (RAG)\n\nI don't know any other specific search types mentioned in this context.")],
 'context': [Document(page_content='the Google search engine as questions. An annotator\nis presented with a question along with a Wikipedia\npage from the top 5 search results, and annotates a\nlong answer (typically a paragraph) and a short answer'),
  Document(page_content='C. Augmenting LLMs through external knowledge - RAG\nOne of the main limitations of pre-trained LLMs is their\nlack of up-to-date knowledge or access to private or use-\ncase-specific information. This is where retrieval augmented\ngeneration (RAG) comes into the picture [164]. RAG, illus-\ntrated in figure 37, involves extracting a query from

In [30]:
second_question = "Explain any of above type you mentioned?"
ai_msg_2 = rag_chain.invoke({"input": second_question, "chat_history": chat_history})
# manually add in chathistory

chat_history.extend(
    [
        HumanMessage(content=question),
        AIMessage(content=ai_msg_2["answer"]),
    ]
)

In [None]:
ai_msg_2

{'input': 'Explain any of above type you mentioned?',
 'chat_history': [],
 'context': [Document(page_content='psychological parlance, has been appropriated within the field\nof artificial intelligence.\nHallucinations in LLMs can be broadly categorized into\ntwo types:\n1)\nIntrinsic Hallucinations: These directly conflict with\nthe source material, introducing factual inaccuracies\nor logical inconsistencies.\n2)\nExtrinsic Hallucinations: These, while not contra-\ndicting, are unverifiable against the source, encom-\npassing speculative or unconfirmable elements.'),
  Document(page_content='reasoning.\n2)\nManual CoT: A more complex variant, it requires\nproviding step-by-step reasoning examples as tem-\nplates for the model. While yielding more effective\nresults, it poses challenges in scalability and mainte-\nnance.\nManual CoT is more effective than zero-shot. However,\nthe effectiveness of this example-based CoT depends on the\nchoice of diverse examples, and constructing promp

In [None]:
second_question = "my name is Mustafa . can you Explain any LLM?"
ai_msg_2 = rag_chain.invoke({"input": second_question, "chat_history": chat_history})
chat_history.extend(
    [
        HumanMessage(content=question),
        AIMessage(content=ai_msg_2["answer"]),
    ]
)

In [None]:
ai_msg_2

{'input': 'my name is Mustafa . can you Explain any LLM?',
 'chat_history': [],
 'context': [Document(page_content='LLMs make it possible to build general-purpose AI agents\nbased on LLMs. While LLMs are trained to produce responses\nin static settings, AI agents need to take actions to interact with\ndynamic environment. Therefore, LLM-based agents often\nneed to augment LLMs to e.g., obtain updated information\nfrom external knowledge bases, verify whether a system action\nproduces the expected result, and cope with when things do\nnot go as expected, etc. We will discuss in detail LLM-based\nagents in Section IV.'),
  Document(page_content='but also be used to augment the capabilities of LLMs going\nas far as turning an LLM into a full-blown AI agent with the\nability to interface with the external world.\nA. LLM limitations\nIt is important to remember that LLMs are trained to predict\na token. While fine-tuning and alignment improves their per-\nformance and adds different dimensi

In [None]:
second_question = "what is my name?"
ai_msg_2 = rag_chain.invoke({"input": second_question, "chat_history": chat_history})
chat_history.extend(
    [
        HumanMessage(content=question),
        AIMessage(content=ai_msg_2["answer"]),
    ]
)

In [None]:
ai_msg_2

{'input': 'what is my name?',
 'chat_history': [],
 'context': [Document(page_content='7B, 13B, 34B, 70B\n2023\n-\n✓\n2T\nOnline sources\nAlpaca\n7B\n2023\nLLaMA1\n✓\n-\nGPT-3.5\nVicuna-13B\n13B\n2023\nLLaMA1\n✓\n-\nGPT-3.5\nLLaMA Family\nKoala\n13B\n2023\nLLaMA\n✓\n-\nDialogue data\nMistral-7B\n7.3B\n2023\n✓\n-\n-\nCode Llama\n34\n2023\nLLaMA2\n✓\n500B\nPublicly available code\nLongLLaMA\n3B, 7B\n2023\nOpenLLaMA\n✓\n1T\n-\nLLaMA-Pro-8B\n8.3B\n2024\nLLaMA2-7B\n✓\n80B\nCode and math corpora\nTinyLlama-1.1B\n1.1B\n2024\nLLaMA1.1B\n✓\n3T\nSlimPajama, Starcoderdata\nPaLM\n8B, 62B, 540B\n2022\n-\n×\n780B'),
  Document(page_content='assistance from ChatGPT.\nStarCoder: In [97], Li et al. introduced StarCoder and\nStarCoderBase. They are 15.5B parameter models with 8K\ncontext length, infilling capabilities and fast large-batch in-\nference enabled by multi-query attention. StarCoderBase is\ntrained on one trillion tokens sourced from The Stack, a\nlarge collection of permissively licensed Gi

In [None]:
chat_history

[]