In [1]:
from langchain import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings

loader = PyPDFDirectoryLoader("pdfs")
data = loader.load_and_split()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=200)
context = "\n\n".join(str(p.page_content) for p in data)
texts = text_splitter.split_text(context)

embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
vector_index = Chroma.from_texts(texts, embeddings).as_retriever()

prompt_template = """
  Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
  provided context just say, "answer is not available in the context", don't provide the wrong answer\n\n
  Context:\n {context}?\n
  Question: \n{question}\n

  Answer:
"""

prompt = PromptTemplate(template = prompt_template, input_variables = ["context", "question"])

from langchain_google_genai import ChatGoogleGenerativeAI
model = ChatGoogleGenerativeAI(model="gemini-pro",
                             temperature=0.3)

chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
question = "What did he do during his time at NXP?"
docs = vector_index.get_relevant_documents(question)
response = chain(
    {"input_documents":docs, "question": question}
    , return_only_outputs=True)

In [8]:
output = f"Question: {question}\n\nResponse: {response}"

print(output)


Question: What did he do during his time at NXP?

Response: {'output_text': 'He was responsible for managing K&S Wire Bonder machines and Post Wire Bond Inspection (AOI) machines, overseeing their operation, maintenance, and performance optimization.'}
