### SHRIPAD DHOPATE

### Importing Required Libraries

In [1]:
from sentence_transformers import SentenceTransformer
import urllib
import warnings
from pathlib import Path as p
from pprint import pprint
import pandas as pd
import google.generativeai as genai
from langchain import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_google_genai import GoogleGenerativeAIEmbeddings

from IPython.display import display
from IPython.display import Markdown
import textwrap

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Markdown function for clean text
def to_markdown(text):
  text = text.replace('•', '  *')
  return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))

In [3]:
api_key = "xxxxxxxxxxxxx"

### Gemini 2.5 flash LLM Model 

In [4]:
def load_llm_model(api_key):
    model = ChatGoogleGenerativeAI(model="gemini-2.5-flash",google_api_key=api_key,
                                 temperature=0.2,top_p = 0.95 ,convert_system_message_to_human=True)
    return model

### Loading PDF using pdf loader

In [11]:
def load_pdf(pdf_path):
    pdf_loader = PyPDFLoader(pdf_path)
    pages = pdf_loader.load_and_split()
    return pages

In [12]:
def chunking(pages):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=200)
    context = "\n\n".join(str(p.page_content) for p in pages)
    texts = text_splitter.split_text(context)
    return texts

### Using HuggingFace Embedding and FAISS

In [13]:
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

In [24]:
pdf_path = "./Samtaai_assignment/data/computer-history.pdf"
model =  load_llm_model(api_key)
pages = load_pdf(pdf_path)
texts = chunking(pages)

In [15]:
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vector_index = FAISS.from_texts(texts, embedding_model)

  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


In [16]:
def answer_question(question, model, vector_index, k=3):
    template = """Use the following context to answer the question in a detailed manner.
                If the context does not contain enough information, say "I don't know."
                Provide examples or explanations if possible.
                
                Context:
                {context}
                
                Question:
                {question}
                
                Answer in detail:
                """
    prompt = PromptTemplate(template=template, input_variables=["context", "question"])

    qa_chain = RetrievalQA.from_chain_type(
        llm=model,
        retriever=vector_index.as_retriever(search_kwargs={"k": k}),
        chain_type="stuff",  
        chain_type_kwargs={"prompt": prompt},
        return_source_documents=True
    )

    result = qa_chain({"query": question})

    return {
        "answer": result["result"],
        "source_documents": result.get("source_documents", [])
    }

In [22]:
user_question = "Explain microcomputer in detail ."
response = answer_question(user_question, model, vector_index)
Markdown(response["answer"])



A microcomputer is a digital computer system that is controlled by a stored program. It utilizes several key components to function:

1.  **Microprocessor:** This is a central component that the microcomputer uses.
2.  **Programmable Read-Only Memory (ROM):** The ROM's role is to define the instructions that are to be executed by the computer.
3.  **Random-Access Memory (RAM):** RAM serves as the functional equivalent of computer memory within the microcomputer system.

The production of microcomputers has greatly benefited from silicon chips, which began to be used in 1971.