#### The Scenario: You are building an internal tool for an HR team. They are tired of manually comparing resumes to job descriptions. They want an AI that reads a Candidate's Resume (PDF) and a Live Job Posting (URL), and instantly generates a report on why the candidate fits or fails.

In [5]:
import os
from dotenv import load_dotenv
load_dotenv()
from uuid import uuid4

import time
from docx import Document

from langchain_huggingface import HuggingFaceEmbeddings
from langchain_pinecone import PineconeVectorStore
from langchain_text_splitters import RecursiveCharacterTextSplitter
from pinecone import Pinecone, ServerlessSpec
from langchain_groq import ChatGroq
from langchain_community.document_loaders import PyPDFLoader
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough


from langchain_community.document_loaders import WebBaseLoader

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [9]:
#load split

loader = PyPDFLoader("./files/resume.pdf")
docs = loader.load()
docs

# webloader

job_url = "https://job-boards.greenhouse.io/greenhouse/jobs/7477093"
loader = WebBaseLoader(job_url)
job_docs = loader.load()

all_docs = docs + job_docs

# Split
splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 200
)
chunks = splitter.split_documents(all_docs)
chunks


[Document(metadata={'producer': 'Microsoft® Word 2019', 'creator': 'Microsoft® Word 2019', 'creationdate': '2025-07-21T11:28:41-04:00', 'author': 'Resume Worded', 'moddate': '2026-01-08T15:50:55-05:00', 'source': './files/resume.pdf', 'total_pages': 2, 'page': 0, 'page_label': '1'}, page_content='SAHIL AMUNDKAR \nAI/ML Engineer \nTempe, AZ | 5512130963 | sahilamundkar2@gmail.com \uf07c LinkedIn | GitHub  \nSUMMARY  \n• AI Engineer with 4 years of experience developing and deploying ML systems across finance, cybersecurity, \nand enterprise AI. \n• Orchestrated AI pipelines integrating conversational AI and customer retention modelling, reducing financial risk \nand improving investment decision workflows by 25% for Fortune 500 clients. \n• Architected Generative AI systems using LangGraph, LangChain, RAG, and LLM fine-tuning, elevating \ndomain-specific response quality by 40% in cybersecurity and compliance auditing use cases. \n• Streamlined data pipelines with Apache Spark, Kafka, A

In [20]:
# Embedding
embeddings = HuggingFaceEmbeddings(model="all-MiniLM-L6-v2")

# Pinecone

pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))

index_name = "jobscore"

In [21]:
if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=384,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1")
    )
    time.sleep(10)

In [22]:
vector_store = PineconeVectorStore(index_name=index_name,embedding=embeddings)
uuids = [ str(uuid4()) for _ in range(len(all_docs))]
vector_store.add_documents(documents=all_docs,id=uuids)

['74b61cb4-9bef-426e-ab54-d9916950f363',
 '2593aa81-0816-44f6-9e21-6b8c063d49c8',
 'b0d4ae97-b78c-4003-bd0c-2f51e63daa7d']

In [29]:
# retriver
retriver = vector_store.as_retriever(search_type ="mmr",search_kwargs={"k":3,"fetch_keys":4})

# format_docs
def format_docs(all_docs):
    return "\n\n".join([d.page_content for d in all_docs])

# Prompt
template = """
Answer the qeustion based on context.
Context: {context}
Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

# model
model = ChatGroq(model ="qwen/qwen3-32b")

In [30]:
# Chain

chain = (
    {"context": retriver | format_docs , "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

In [33]:
query = """
Compare the Job Description requirements against the Candidate's Resume. 
Identify the key technical skills match, and explicitly list the missing skills or experience gaps.
"""

print(f"result {query}")

result 
Compare the Job Description requirements against the Candidate's Resume. 
Identify the key technical skills match, and explicitly list the missing skills or experience gaps.



In [34]:
start = time.time()
response = chain.invoke(query)
end = time.time()

# Save to Word
doc = Document()
doc.add_heading('Assignment Report', 0)
doc.add_paragraph(f"Query: {query}")
doc.add_paragraph(f"Answer: {response}")
doc.add_paragraph(f"Time Taken: {end - start:.2f} seconds")
doc.save('Simple_Report.docx')