##Install All packages


In [None]:
!pip install -q --upgrade langchain pypdf chromadb google-generativeai langchain-google-genai python-dotenv

#Import All required Packages

In [None]:
import google.generativeai as genai
from langchain import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from google.colab import userdata
from dotenv import load_dotenv

#Set up API key

In [None]:
GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')
genai.configure(api_key=GOOGLE_API_KEY)

In [None]:
!echo -e 'GOOGLE_API_KEY=AIzaSyB7-Z9AivYe01bpPGiQWk_jqIaY7x9OiUU' > .env

In [None]:
load_dotenv()

True

#Load PDF files from Folder

In [None]:
loader = PyPDFDirectoryLoader("/content/sample_data/data")
data = loader.load()

In [None]:
data

[Document(page_content='/two.ltab/two.ltab MAY /two.lprp/zero.lprp/one.lprp/nine.lprp  /  HOMETEXTILESTODAY.COMIn Walmart Inc.’s new annual report, Doug McMillon reﬂ  ects on lessons \nlearned during his ﬁ  ve years as the company’s chief executive ofﬁ  cer.\nThe report, titled “Deﬁ  ning the Future of Retail,” was recently posted to \nthe company’s investor relations site ahead of the retailer’s June 5 annual \nshareholder meeting\nMcMillon writes that Walmart is focused on long-term success.\n“We’re playing the long game,” he said. “Managing our business on a dai-\nly basis is important, but our most important strategic decisions are made \nin light of what we want our company \nto become for the next generation.”\nA leader during times of transforma-\ntion has to be curious, introduce new \nideas and ask questions. “You can’t \npush a rope, but you can pull it. In \nother words, sometimes you just can’t \nlead from behind,” he added.\nIn pursuing growth, companies also \nhave to get

#Split data into Text chunks

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
content = "\n\n".join(str(page.page_content) for page in data)

In [None]:
texts = text_splitter.split_text(content)

In [None]:
print(len(texts))

66


In [None]:
texts[0]

'/two.ltab/two.ltab MAY /two.lprp/zero.lprp/one.lprp/nine.lprp  /  HOMETEXTILESTODAY.COMIn Walmart Inc.’s new annual report, Doug McMillon reﬂ  ects on lessons \nlearned during his ﬁ  ve years as the company’s chief executive ofﬁ  cer.\nThe report, titled “Deﬁ  ning the Future of Retail,” was recently posted to \nthe company’s investor relations site ahead of the retailer’s June 5 annual \nshareholder meeting\nMcMillon writes that Walmart is focused on long-term success.\n“We’re playing the long game,” he said. “Managing our business on a dai-\nly basis is important, but our most important strategic decisions are made \nin light of what we want our company \nto become for the next generation.”\nA leader during times of transforma-\ntion has to be curious, introduce new \nideas and ask questions. “You can’t \npush a rope, but you can pull it. In \nother words, sometimes you just can’t \nlead from behind,” he added.\nIn pursuing growth, companies also \nhave to get comfortable with “an i

#Download Embeddings model

In [None]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

In [None]:
embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")

#Create embeddings for each of the Text chunks and Store them in Vectorstore-chromadb

In [None]:
vector_store = Chroma.from_texts(texts, embeddings).as_retriever()

#Create a Prompt Template

In [None]:
prompt_template = """
  Please answer the question in as much detail as possible based on the provided context.
  Ensure to include all relevant details. If the answer is not available in the provided context,
  kindly respond with "The answer is not available in the context." Please avoid providing incorrect answers.
\n\n
  Context:\n {context}?\n
  Question: \n{question}\n

  Answer:
"""

prompt = PromptTemplate(template = prompt_template, input_variables = ["context", "question"])

#Load the model

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI
model = ChatGoogleGenerativeAI(model="gemini-pro",
                             temperature=0.3)


In [None]:
chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)

In [None]:
question = input("Enter your question: ")
docs = vector_store.get_relevant_documents(question)

response = chain(
    {"input_documents":docs, "question": question}
    , return_only_outputs=True)
response

Enter your question: what is linear regression model?


{'output_text': 'The linear regression model is a statistical model that attempts to determine the relationship between one or more independent variables and a dependent variable. It is typically used to predict the value of the dependent variable based on the values of the independent variables. In the linear regression model, the dependent variable is assumed to be a linear function of the independent variables, plus an error term. The error term represents the difference between the observed value of the dependent variable and the value predicted by the model.'}