In [1]:
import urllib
import warnings
from pathlib import Path as p
from pprint import pprint

import pandas as pd
from langchain import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings import VertexAIEmbeddings
from langchain.llms import VertexAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma

In [4]:
pdf_loader = PyPDFLoader('ML.pdf')
pages = pdf_loader.load_and_split()
#print(pages[3].page_content)

In [5]:
vertex_llm_text = VertexAI(model_name="text-bison@001", max_output_tokens = 1000)
vertex_embeddings = VertexAIEmbeddings(model_name="textembedding-gecko@001")

In [6]:
#Split the document content.
text_splitter = CharacterTextSplitter(chunk_size=10000, chunk_overlap=0)
context = "\n\n".join(str(p.page_content) for p in pages)
texts = text_splitter.split_text(context)

 Here, the system first calculates the similarity between the question and the vectors in the database. The most similar vectors are then used to fetch the context that is relevant to the question.

In [7]:
#Then, create the similarity search index using Chroma
vector_index = Chroma.from_texts(texts, vertex_embeddings).as_retriever()

In [19]:
#question = "IMPACT OF ONLINE SALES ON COSTS AT NETFLIX?"
question = "What is Delayed reward in Reinforcement learning ?"
#question = "What is Postponement ? "

In [20]:
#Next, retrieve relevant context using the original question.
docs = vector_index.get_relevant_documents(question)

In [21]:
z = docs[0].page_content

In [22]:
context = " ".join(str(e.page_content) for e in docs)

In [23]:
def str_rm_whitespaces(ip_str):
    
    ip_str = ip_str.replace("\n", " ")
    ip_str = ip_str.replace("\\n", "")
    ip_str = ip_str.replace("\\u", "")
    ip_str = ip_str.replace("(", "")
    ip_str = ip_str.replace(")", "")
    ip_str = ip_str.replace("{", "")
    ip_str = ip_str.replace("}", "")
    ip_str = ip_str.replace(":", "")
    ip_str = ip_str.replace("'", "")
    ip_str = ip_str.replace(".", "")
    ip_str = ip_str.replace(",", "")
    
    while "  " in ip_str:
        ip_str = ip_str.replace("  ", " ")

    return ip_str
context = str_rm_whitespaces(context)

In [24]:
print(len(context.split()))

5909


In [25]:
question_prompt_template = """
                    Answer the question as precise as possible using the provided context. \n\n
                    Context: \n {context} \n
                    Question: \n {question} \n
                    Answer:
                    """
question_prompt = PromptTemplate(
    template=question_prompt_template, input_variables=["context", "question"]
)

# summaries is required. a bit confusing.
combine_prompt_template = """Given the extracted content and the question, find answer in a docs and summerize answer in 1000 words.
If the answer is not contained in the docs, say "answer not available in context. \n\n
Summaries: \n {summaries}?\n
Question: \n {question} \n
Answer:
"""
combine_prompt = PromptTemplate(
    template=combine_prompt_template, input_variables=["summaries", "question"]
)

In [26]:
map_reduce_chain = load_qa_chain(
    vertex_llm_text,
    chain_type="map_reduce",
    return_intermediate_steps=True,
    question_prompt=question_prompt,
    combine_prompt=combine_prompt,
)

In [27]:
map_reduce_embeddings_outputs = map_reduce_chain(
    {"input_documents": docs, "question": question}
)


In [28]:
result = str_rm_whitespaces(map_reduce_embeddings_outputs["output_text"])

In [29]:
result

'Delayed reward is a reward that is given to an agent after a certain amount of time has passed This can be problematic for reinforcement learning algorithms as they may not be able to learn the correct behavior if the reward is delayed There are a number of ways to deal with delayed rewards such as using temporal-difference learning or using a reward shaping technique'

In [30]:
map_reduce_embeddings_outputs["output_text"]

'Delayed reward is a reward that is given to an agent after a certain amount of time has passed. This can be problematic for reinforcement learning algorithms, as they may not be able to learn the correct behavior if the reward is delayed. There are a number of ways to deal with delayed rewards, such as using temporal-difference learning or using a reward shaping technique.'