#### 1. Loading Data

In [1]:
from pathlib import Path
import PyPDF2

In [2]:
#Paths 
resumes_path = Path("../data/resumes")
jd_path= Path("../data/job_descriptions/jd1.txt")

#Read PDFs
def read_pdf(file_path):
    text =""
    with open(file_path, 'rb') as f:
        reader = PyPDF2.PdfReader(f)
        for page in reader.pages:
            text+=page.extract_text()
        return text
    
resumes_texts = [read_pdf(f) for f in resumes_path.glob("*.pdf")]

# Read JD text
with open(jd_path, "r") as f:
    jd_text = f.read()

print(f"Loaded {len(resumes_texts)} resumes")
print(f"JD Sample:\n{jd_text[:300]}...")

Loaded 4 resumes
JD Sample:
Job Title: Business Analyst
Location: Bengaluru, India
Job Type: Full-time
Experience: 2â€“5 years
About the Company:
TechNova Solutions is a leading IT services company focused on delivering innovative solutions to drive business growth and operational efficiency.
Job Summary:
We are looking for a de...


#### 2. Create Embeddings

In [3]:
# Load the .env file
from dotenv import load_dotenv
import os

load_dotenv()

True

In [4]:
from langchain_huggingface import HuggingFaceEmbeddings

hf_token = os.getenv("HUGGING_FACE_API_KEY")

# Initialize embeddings
model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": False}
embedding = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs,
)


  from .autonotebook import tqdm as notebook_tqdm


In [5]:
# Create vectors for all resumes
resume_vectors = [embedding.embed_query(text) for text in resumes_texts]

#### 3. Create Vector store

In [6]:
from langchain_community.vectorstores import FAISS

# Build FAISS index
vectorstore = FAISS.from_texts(resumes_texts, embedding)

# Save locally
vectorstore.save_local("vectorstore/")


#### 4. Query: Find Top Matching Resumes for a JD

In [8]:
# Find top 3 resumes
results = vectorstore.similarity_search(jd_text, k=3)

for i, res in enumerate(results, 1):
    print(f"--- Resume {i} ---\n{res.page_content[:500]}...\n")


--- Resume 1 ---
Business Analyst (BA)
Name: Rahul Kumar
Email: rahul.kumar@example.com
Phone: +91-XXXXXXXXXX
LinkedIn: linkedin.com/in/rahulkumar
Professional Summary:
Business Analyst with 5 years of experience in data-driven decision making, requirement gathering,
and reporting using BI tools.
Skills:
Excel, SQL, Power BI, Tableau, Python, Requirement Analysis, Agile Methodology
Experience:
- Conducted market and business analysis for multiple product launches.
- Developed dashboards in Tableau and Power BI t...

--- Resume 2 ---
Chartered Accountant (CA)
Name: Priya Sharma
Email: priya.sharma@example.com
Phone: +91-XXXXXXXXXX
LinkedIn: linkedin.com/in/priyasharma
Professional Summary:
Chartered Accountant with 6 years of experience in auditing, taxation, and financial advisory
services.
Skills:
Accounting, Taxation, Auditing, Financial Reporting, SAP, Excel, IFRS, GST
Experience:
- Conducted statutory audits for multiple companies in manufacturing and IT sectors.
- Managed tax plan

In [26]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI

qa_chain = RetrievalQA.from_chain_type(
    llm=ChatOpenAI(temperature=0),
    retriever=vectorstore.as_retriever()
)

answer = qa_chain.run(jd_text)
print(answer)

ModuleNotFoundError: No module named 'langchain.chains'