In [19]:
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader("pravar_sharma_updated.pdf")
docs = loader.load()

In [20]:
docs

[Document(metadata={'producer': 'Skia/PDF m137', 'creator': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/137.0.0.0 Safari/537.36', 'creationdate': 'D:20250615101126', 'title': 'first resume', 'moddate': 'D:20250615101126', 'author': 'first resume', 'keywords': 'Python, Django, FastAPI\nJavaScript, React, Vite, Tailwind CSS\nREST APIs, PostgreSQL, MySQL\nAWS (EC2, S3, Lambda, SQS, IAM)\nDocker, GitHub Actions, CI/CD\nMicroservices Architecture\nSystem Design, Agile/Scrum\nGenerative AI Integration (LangChain, OpenAI API)', 'source': 'pravar_sharma_updated.pdf', 'total_pages': 1, 'page': 0, 'page_label': '1'}, page_content='Pravar Sharma\nJaipur,India pravar.sharma@gmail.com+91 9414472171 in/pravar-sharma-3410a199\nSUMMARY\nResults-driven Backend Developer with 5+ years of experience designing and developing scalable web applications using Python, Django,\nReact, and AWS. Proven track record in building SaaS platforms, integrating third-party API

In [21]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
documents = text_splitter.split_documents(docs)

In [22]:
documents

[Document(metadata={'producer': 'Skia/PDF m137', 'creator': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/137.0.0.0 Safari/537.36', 'creationdate': 'D:20250615101126', 'title': 'first resume', 'moddate': 'D:20250615101126', 'author': 'first resume', 'keywords': 'Python, Django, FastAPI\nJavaScript, React, Vite, Tailwind CSS\nREST APIs, PostgreSQL, MySQL\nAWS (EC2, S3, Lambda, SQS, IAM)\nDocker, GitHub Actions, CI/CD\nMicroservices Architecture\nSystem Design, Agile/Scrum\nGenerative AI Integration (LangChain, OpenAI API)', 'source': 'pravar_sharma_updated.pdf', 'total_pages': 1, 'page': 0, 'page_label': '1'}, page_content='Pravar Sharma\nJaipur,India pravar.sharma@gmail.com+91 9414472171 in/pravar-sharma-3410a199\nSUMMARY\nResults-driven Backend Developer with 5+ years of experience designing and developing scalable web applications using Python, Django,\nReact, and AWS. Proven track record in building SaaS platforms, integrating third-party API

In [23]:
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import FAISS

db = FAISS.from_documents(documents, OllamaEmbeddings(model="llama3.2"))

In [24]:
query = "RESTful APIs for booking, cancellation"
result = db.similarity_search(query)
result[0].page_content

'Docker, GitHub Actions, CI/CD•\nMicroservices Architecture•\nSystem Design, Agile/Scrum•\nGenerative AI Integration (LangChain, OpenAI API)•'

In [25]:
from langchain_ollama import OllamaLLM
llm = OllamaLLM(model="llama3.2")
llm

OllamaLLM(model='llama3.2')

In [26]:
# design Chatprompt template
from langchain_core.prompts import ChatPromptTemplate
# context is all the documents there are in the vector Database.
# input is the question I am asking.
prompt = ChatPromptTemplate.from_template(
    """
    Answer the following question based only on the provided context.
    Think step by step before providing answer.
    <context>
    {context}
    </context>
    Question: {input}
    """
)

In [27]:
# create stuff document chain
from langchain.chains.combine_documents import create_stuff_documents_chain

doc_chain = create_stuff_documents_chain(llm, prompt)

In [28]:
# Retrivers
"""
It is an interface that return the document given an unstructured query, it is more like a catalog that finds the index 
if the query you have asked and then find it in the vector db, and also helps in narrow down the search.
"""

retriver = db.as_retriever()
retriver

VectorStoreRetriever(tags=['FAISS', 'OllamaEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x12066e5d0>, search_kwargs={})

In [29]:
# Retrival Chain
"""
When we combine retriver anc document chain together it is known as retrival chain.
This chain takes user input, which is then pass to retriver to fetch relevent documents.
This combined is pass to an LLM to generate response.
"""

from langchain.chains import create_retrieval_chain
retrival_chain = create_retrieval_chain(retriver, doc_chain)


In [30]:
response = retrival_chain.invoke({"input": "What are the companies he has worked in so far?"})

In [31]:
response['answer']

'Based on the provided context, Pravar Sharma has worked for the following companies:\n\n1. Ltimindtree (Senior Engineer Cloud Services & Software)\n2. Quixom Technology (Trootech Business Solutions Pvt. Ltd.) - twice (as Python developer and again as part of Trootech Business Solutions Pvt. Ltd.)\n3. Ideeperners Pvt. Ltd.\n4. Dgrity Solutions Pvt. Ltd.\n\nNote that he also mentions working in Ahmedabad, Noida, Jaipur, Delhi, and other locations, but these are likely the cities or regions where his companies are based, rather than separate jobs.'

In [32]:
response = retrival_chain.invoke({"input": "what is his education?"})

In [33]:
response['answer']

"According to the provided context, Pravar Sharma's education is as follows:\n\n* Bachelor of Technology (B.Tech) in Electrical Engineering\nfrom Rajasthan Technical University, Kota, with a percentage of 70.70%."

In [34]:
response = retrival_chain.invoke({"input": ""})