In [1]:
import os
from dotenv import load_dotenv
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser


In [2]:
os.environ["LANGCHAIN_PROJECT"] = "RAG Chatbot" # to change the project name in langSmith

In [3]:
load_dotenv()  # expects OPENAI_API_KEY in .env

True

In [4]:
PDF_PATH = "resume.pdf"

In [5]:
# 1) Load PDF
loader = PyPDFLoader(PDF_PATH)
docs = loader.load()  # one Document per page


In [6]:
docs

[Document(metadata={'producer': 'www.ilovepdf.com', 'creator': 'Microsoft® Word 2016', 'creationdate': '2026-01-10T07:11:22+00:00', 'author': 'Umesh Kumhar', 'moddate': '2026-01-10T07:11:22+00:00', 'source': 'resume.pdf', 'total_pages': 2, 'page': 0, 'page_label': '1'}, page_content='MOHIT KUMHAR Backend Engineer|Data Science \n \n Jaipur | +91-7976-127-452 |  mohitmolela@gmail.com|   @mohitkumhar*| @mohitkumhar* \n \nSUMMARY \n \nBackend engineer and data science enthusiast with skills in Python, analytics, and machine learning, seeking \nopportunities to build impactful and scalable data-driven solutions that make a real impact. \n \nTECHNICAL SKILLS \nLanguages & Scripts: Python, C/C++ \nData Manipulation: Pandas, NumPy, Data Wrangling, Data Cleaning, Feature Engineering \nData Visualization: Matplotlib, Seaborn, Plotly \nMachine Learning: Scikit-learn, Model Deployment, Model Evaluation \nNatural Language Processing (NLP): Text Preprocessing, Sentiment Analysis, Named Entity Recogn

In [7]:
# 2) Chunk
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
splits = splitter.split_documents(docs)

In [8]:
splits

[Document(metadata={'producer': 'www.ilovepdf.com', 'creator': 'Microsoft® Word 2016', 'creationdate': '2026-01-10T07:11:22+00:00', 'author': 'Umesh Kumhar', 'moddate': '2026-01-10T07:11:22+00:00', 'source': 'resume.pdf', 'total_pages': 2, 'page': 0, 'page_label': '1'}, page_content='MOHIT KUMHAR Backend Engineer|Data Science \n \n Jaipur | +91-7976-127-452 |  mohitmolela@gmail.com|   @mohitkumhar*| @mohitkumhar* \n \nSUMMARY \n \nBackend engineer and data science enthusiast with skills in Python, analytics, and machine learning, seeking \nopportunities to build impactful and scalable data-driven solutions that make a real impact. \n \nTECHNICAL SKILLS \nLanguages & Scripts: Python, C/C++ \nData Manipulation: Pandas, NumPy, Data Wrangling, Data Cleaning, Feature Engineering \nData Visualization: Matplotlib, Seaborn, Plotly \nMachine Learning: Scikit-learn, Model Deployment, Model Evaluation \nNatural Language Processing (NLP): Text Preprocessing, Sentiment Analysis, Named Entity Recogn

In [9]:
# 3) Embed + index
emb = OpenAIEmbeddings(
    model="openai/text-embedding-3-small",
    base_url="https://openrouter.ai/api/v1",
)

In [10]:
vs = FAISS.from_documents(splits, emb)

In [11]:
retriever = vs.as_retriever(search_type="similarity", search_kwargs={"k": 4})

In [12]:
retriever

VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000002074AAB8B10>, search_kwargs={'k': 4})

In [13]:
# 4) Prompt
prompt = ChatPromptTemplate.from_messages([
    ("system", "Answer ONLY from the provided context. If not found, say you don't know."),
    ("human", "Question: {question}\n\nContext:\n{context}")
])


In [14]:
# 5) Chain
llm = ChatOpenAI(
    base_url="https://openrouter.ai/api/v1",
    model="gpt-4o-mini", 
    temperature=0
)


In [15]:
def format_docs(docs):
    return "\n\n".join(d.page_content for d in docs)


In [16]:
parallel = RunnableParallel({
    "context": retriever | RunnableLambda(format_docs),
    "question": RunnablePassthrough()
})


In [17]:
chain = parallel | prompt | llm | StrOutputParser()

In [18]:
# 6) Ask questions
print("PDF RAG ready. Ask a question (or Ctrl+C to exit).")
q = input("\nQ: ")


PDF RAG ready. Ask a question (or Ctrl+C to exit).



Q:  "what are my ml skills"


In [19]:
ans = chain.invoke(q.strip())
print("\nA:", ans)



A: Your machine learning skills include:

- Proficiency in Python and relevant libraries such as Scikit-learn for machine learning.
- Experience in model deployment using Flask API, Docker, Heroku, Vercel, Streamlit, and Netlify.
- Knowledge of natural language processing (NLP) techniques including text preprocessing, sentiment analysis, named entity recognition (NER), and word embeddings (Word2Vec).
- Ability to create data preprocessing pipelines using Pandas and NumPy for cleaning and transforming datasets.
- Experience in building dashboards and visualizations with Matplotlib, Seaborn, and Plotly to communicate model results and insights.
- Skills in model evaluation and performance tuning, as well as failure analysis and applying targeted fixes to improve model performance.


In [20]:
"what are my ml skills"

'what are my ml skills'