In [None]:
import pandas as pd
from langchain import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain_community.llms import Ollama
from langchain_community.embeddings import OllamaEmbeddings

# may be very similar to RAG
# credits to this code : https://github.com/GoogleCloudPlatform/generative-ai/blob/main/language/use-cases/document-qa/question_answering_documents_langchain.ipynb

In [None]:
llm = Ollama(model="llama2")
embeddings = OllamaEmbeddings()

# Load the PDF

In [None]:
pdf_loader = PyPDFLoader("../pdf_data/allcott.pdf")
pages = pdf_loader.load_and_split()
print(pages[3].page_content)

# Define your prompt

In [None]:
question = "Could you give me information about the sample?"
prompt_template = """Answer the question as precise as possible using the provided context. If the answer is
                    not contained in the context, say "answer not available in context" \n\n
                    Context: \n {context}?\n
                    Question: \n {question} \n
                    Answer:
                  """

prompt = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

## Q&A without similarity search (i.e. without vector database)

In [None]:
len_context = 7 
len_input = 3
context = "\n".join(str(p.page_content) for p in pages[:len_context])
print("The total words in the context: ", len(context))

## Stuffing

In [None]:
stuff_chain = load_qa_chain(llm, chain_type="stuff", prompt=prompt)

In [None]:
stuff_answer = stuff_chain(
    {"input_documents": pages[len_context:len_context+len_input], "question": question}, return_only_outputs=True
)

## Map reduce

In [None]:
question_prompt_template = """
                    Answer the question as precise as possible using the provided context. \n\n
                    Context: \n {context} \n
                    Question: \n {question} \n
                    Answer:
                    """
question_prompt = PromptTemplate(
    template=question_prompt_template, input_variables=["context", "question"]
)

combine_prompt_template = """Given the extracted content and the question, create a final answer.
If the answer is not contained in the context, say "answer not available in context. \n\n
Summaries: \n {summaries}?\n
Question: \n {question} \n
Answer:
"""
combine_prompt = PromptTemplate(
    template=combine_prompt_template, input_variables=["summaries", "question"]
)

In [None]:
map_reduce_chain = load_qa_chain(
    llm,
    chain_type="map_reduce",
    return_intermediate_steps=True,
    question_prompt=question_prompt,
    combine_prompt=combine_prompt,
)

In [None]:
map_reduce_outputs = map_reduce_chain({"input_documents": pages, "question": question})

# Q&A with similarity search (see also codes in the rag folder)

In [None]:
vector_index = Chroma.from_documents(pages, embeddings).as_retriever()


In [None]:
docs = vector_index.get_relevant_documents(question)

In [None]:
map_reduce_embeddings_outputs = map_reduce_chain(
    {"input_documents": docs, "question": question}
)

In [None]:
print(map_reduce_embeddings_outputs["output_text"])