In [42]:
import fitz # PyMuPDF used for PDF manipulation
from langchain_core.documents import Document # Document class from LangChain
from langchain.text_splitter import RecursiveCharacterTextSplitter # Text splitter from LangChain
from langchain_community.vectorstores import FAISS # FAISS vector store from LangChain Community
from langchain_google_genai import ChatGoogleGenerativeAI # Google Generative AI integration
import os # OS module for environment variable access
from langchain.embeddings import HuggingFaceEmbeddings ## HuggingFace embeddings from LangChain
from gtts import gTTS # Google Text-to-Speech

In [32]:
from dotenv import load_dotenv # Load environment variables from .env file
load_dotenv() # Load the .env file

os.environ["GOOGLE_API_KEY"]=os.getenv("GEMINI_API_KEY")


DATA INGESTION

In [33]:
pdf_path="job.pdf"
doc=fitz.open(pdf_path) # Open the PDF document

#storage for the texts documents we extract from the pdf
texts=[]
#now we will extract the text from the pdf

for page_num,page in enumerate(doc):
    text=page.get_text() #Extract text from the page

    if(text.strip()): # check if text is not empty
        metadata={
            "source":pdf_path,
            "page":page_num
        }
        texts.append(Document(page_content=text,metadata=metadata)) #create a Document obejct and append to texts




#text splitter
text_splitter=RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=50) # here we define the chunk size and overlap and basically intialize the text splitter

In [34]:
len(texts)

7

CHUNKING

In [35]:
chunks=text_splitter.split_documents(texts) # split the documents into chunks
len(chunks)

18

NOW WE NEED TO DO EMMBEDDING OF THE CHUNKS TO STORE IN THE VECTOR DATABASE

In [36]:
embeddings = HuggingFaceEmbeddings( # Initialize HuggingFace embeddings
    model_name="all-MiniLM-L6-v2"
)
#so the thing is we dont need to embed the chunks ourselves because the vector store will do it for us at the time of storing the chunks in the vector db

vector_store = FAISS.from_documents(
    documents=chunks,  #documents to be stored
    embedding=embeddings #embedding model to be used which we initialized earlier
)

print(vector_store.index.ntotal) #number of vectors stored in the vector db and this should be equal to number of chunks


18


SEE THIS PART IS DONE NOW WE MOVE ONTO THE USER QUERY PART

In [37]:
# now we give the query part

query = "what do we need to build in the assignments exactly?"

#now we create the retriever from the vector store to retrieve the top k similar chunks from the vector db

retriever = vector_store.as_retriever(
    search_kwargs={"k": 3} #number of similar chunks to retrieve
)

docs = retriever.get_relevant_documents(query) #retrieve the relevant documents and store in docs


#just to see what chunks we have retrieved and their metadata
for i, doc in enumerate(docs):
    print(f"Chunk {i+1}")
    print(doc.page_content)
    print(doc.metadata)
    print("----")



Chunk 1
Sample User Flows
Flow 1: Manual Task Creation
1. User clicks "Add Task" button
2. A form appears with fields for title, description, status, priority, and due 
date
3. User fills in the fields and clicks "Save"
4. Task appears in the appropriate column/list
Flow 2: Voice Task Creation
1. User clicks the microphone icon
2. User speaks: "Remind me to send the project proposal to the client by next 
Wednesday, it's high priority"
{'source': 'job.pdf', 'page': 3}
----
Chunk 2
b. Assumptions you made (about emails, formats, limitations, etc.).
5. AI Tools Usage
a. Which AI tools you used while building (Copilot, ChatGPT, Claude, 
Cursor, etc.).
b. What they helped with (boilerplate, debugging, design, parsing ideas, 
etc.).
c. Any notable prompts/approaches.
d. What you learned or changed because of these tools.
Demo Video (Mandatory)
Create a 5-10 minute screen recording that includes:
1. Quick walkthrough of the application
{'source': 'job.pdf', 'page': 5}
----
Chunk 3
a. Prerequ

OK WE HAVE BUILD THE RETRIEVER NOW WE MOVE ONTO THE LLM

In [None]:
llm=ChatGoogleGenerativeAI(
    model="gemini-2.5-flash", #specify the model
    temperature=0, #temperature for response generation
)


context = "\n".join([doc.page_content for doc in docs]) #combine the retrieved chunks into a single context string

prompt = f"""
Answer the question using only the context below.

Follow these rules strictly:
1. Use numbers (1, 2, 3) for main points.
2. Use lowercase alphabets (a, b, c) for sub-points.
3. Do not use asterisks, dashes, markdown, or special symbols.
4. Write in plain text only.

Context:
{context}

Question:
{query}
"""

answer= llm.invoke(prompt) #generate the answer using the llm
answer_text = answer.content

print(answer_text)


Based on the context provided, the assignments require building or producing the following:

1.  An application that supports user flows such as:
    a.  Manual Task Creation, where a user clicks "Add Task", fills a form with title, description, status, priority, and due date, clicks "Save", and the task appears.
    b.  Voice Task Creation, where a user clicks a microphone icon and speaks a command like "Remind me to send the project proposal to the client by next Wednesday, it's high priority".
2.  Documentation of assumptions made (about emails, formats, limitations, etc.).
3.  Documentation of AI Tools Usage, including:
    a.  Which AI tools were used (Copilot, ChatGPT, Claude, Cursor, etc.).
    b.  What they helped with (boilerplate, debugging, design, parsing ideas, etc.).
    c.  Any notable prompts/approaches.
    d.  What was learned or changed because of these tools.
4.  A 5-10 minute screen recording (Demo Video) that includes:
    a.  A quick walkthrough of the applicatio

NOW WE MOVE ONTO THE TTS PART

In [53]:
tts=gTTS(text=answer_text,lang='en') #initialize gTTS with the answer text and language
tts.save("answer.mp3") #save the audio file

print("Audio answer saved as answer.mp3")

Audio answer saved as answer.mp3
