In [1]:
# from langchain_community.document_loaders import UnstructuredPDFLoader, OnlinePDFLoader
from langchain_community.document_loaders import PyPDFLoader
from pprint import pprint
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma

In [2]:
local_path = "/mnt/c/Users/user/OneDrive/Desktop/rag-pdf-chatbot/data/WEF_The_Global_Cooperation_Barometer_2024.pdf"

In [3]:
if local_path:
    loader = PyPDFLoader(file_path=local_path)
    data = loader.load()
else:
    print("Upload a PDF file")
    
text_splitter = RecursiveCharacterTextSplitter(chunk_size=7500, chunk_overlap=100)
chunks = text_splitter.split_documents(data)

vector_db = Chroma.from_documents(
    documents=chunks,
    embedding=OllamaEmbeddings(model="nomic-embed-text", show_progress=True),
    collection_name='local-rag')
    

OllamaEmbeddings: 100%|██████████| 26/26 [00:39<00:00,  1.53s/it]


In [4]:
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.chat_models import ChatOllama
from langchain_core.runnables import RunnablePassthrough
from langchain.retrievers.multi_query import MultiQueryRetriever

In [10]:
# LLM from Ollama
local_model = "llama3"
llm = ChatOllama(model=local_model)

In [11]:
QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant. Your task is to generate five
    different versions of the given user question to retrieve relevant documents from
    a vector database. By generating multiple perspectives on the user question, your
    goal is to help the user overcome some of the limitations of the distance-based
    similarity search. Provide these alternative questions separated by newlines.
    Original question: {question}""",
)

In [12]:
retriever = MultiQueryRetriever.from_llm(
    vector_db.as_retriever(),
    llm,
    prompt=QUERY_PROMPT
)

# RAG prompt
template = """Answer the question based ONLY on the following context:
{context}
Question:{question}"""

prompt = ChatPromptTemplate.from_template(template)

In [13]:
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [14]:

chain.invoke("What are the 5 pillars of global cooperation?")

OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00,  1.60it/s]
OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00, 13.13it/s]
OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00,  5.89it/s]
OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00, 12.03it/s]
OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00,  7.09it/s]
OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00,  9.16it/s]
OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00,  9.39it/s]
OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00,  6.20it/s]
OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00, 10.62it/s]
OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00,  8.34it/s]
OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00,  6.22it/s]


'Based on the provided context, the 5 pillars of global cooperation mentioned in the report are:\n\n1. Trade and capital\n2. Innovation and technology\n3. Climate and natural capital\n4. Health and wellness\n5. Peace and security'