#### 1. Loading Data

In [None]:
from pathlib import Path
import PyPDF2

In [None]:
#Paths 
resumes_path = Path("../data/resumes")
jd_path= Path("../data/job_descriptions/jd1.txt")

#Read PDFs
def read_pdf(file_path):
    text =""
    with open(file_path, 'rb') as f:
        reader = PyPDF2.PdfReader(f)
        for page in reader.pages:
            text+=page.extract_text()
        return text
    
resumes_texts = [read_pdf(f) for f in resumes_path.glob("*.pdf")]

# Read JD text
with open(jd_path, "r") as f:
    jd_text = f.read()

print(f"Loaded {len(resumes_texts)} resumes")
print(f"JD Sample:\n{jd_text[:300]}...")

#### 2. Create Embeddings

In [None]:
# Load the .env file
from dotenv import load_dotenv
import os

load_dotenv()

In [None]:
from langchain_huggingface import HuggingFaceEmbeddings

hf_token = os.getenv("HUGGING_FACE_API_KEY")

# Initialize embeddings
model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": False}
embedding = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs,
)


In [None]:
# Create vectors for all resumes
resume_vectors = [embedding.embed_query(text) for text in resumes_texts]

#### 3. Create Vector store

In [None]:
from langchain_community.vectorstores import FAISS

# Build FAISS index
vectorstore = FAISS.from_documents(docs, embedding)

# Save locally
vectorstore.save_local("vectorstore/")


In [None]:
from langchain_groq import ChatGroq
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough

api_key = os.getenv("GROQ_API_KEY")

llm = ChatGroq(
    model="llama3-8b-8192",
    api_key=api_key,
)

prompt = PromptTemplate(
    input_variables=["resumes_texts"],
    template="""
    Extract only the full name of the candidate from this resume text.
    If you can't find it, return "Name: Unknown".

    Resume:
    {resumes_texts}

    Output format:
    Name: <candidate name>
    """
)

extract_name_chain = RunnablePassthrough() | prompt | llm

def extract_candidate_name(resumes_texts):
    response = extract_name_chain.invoke({"resume_text": resumes_texts[:1000]})
    return response.content.strip()

In [None]:
results = vectorstore.similarity_search(jd_text, k=3)

for res in results:
    file_name = res.metadata.get("file_name", "Unknown")
    name = extract_candidate_name(res.page_content)
    print(f"üìÑ File: {file_name}")
    print(f"üë§ {name}\n{'-'*50}\n")

In [4]:
import os
from pathlib import Path
from dotenv import load_dotenv
from PyPDF2 import PdfReader

from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough

# ======================================================
# 1Ô∏è‚É£ Load environment variables
# ======================================================
load_dotenv()
groq_api = os.getenv("GROQ_API_KEY")

if not groq_api:
    raise ValueError("‚ùå GROQ_API_KEY not found in .env file.")
print("‚úÖ GROQ_API_KEY loaded successfully")

‚úÖ GROQ_API_KEY loaded successfully


In [None]:
# ======================================================
# 2Ô∏è‚É£ Initialize LLM (Groq)
# ======================================================
from langchain_groq import ChatGroq

# Initialize Groq model
llm = ChatGroq(
    model="qwen/qwen3-32b", 
    groq_api_key=groq_api,            # replace with your Groq API key variable
    temperature=0.3
)

# Quick test
try:
    response = llm.invoke("Say 'Groq connected' if you're working.")
    print("‚úÖ LLM Test:", response.content)
except Exception as e:
    raise RuntimeError(f"‚ùå Groq connection failed: {e}")

RuntimeError: ‚ùå Groq connection failed: Error code: 400 - {'error': {'message': 'The model `llama-3.1-70b-versatile` has been decommissioned and is no longer supported. Please refer to https://console.groq.com/docs/deprecations for a recommendation on which model to use instead.', 'type': 'invalid_request_error', 'code': 'model_decommissioned'}}

In [None]:



# ======================================================
# 3Ô∏è‚É£ Helper to read PDFs
# ======================================================
def read_pdf(file_path):
    text = ""
    with open(file_path, "rb") as f:
        reader = PdfReader(f)
        for page in reader.pages:
            text += page.extract_text() or ""
    return text

# ======================================================
# 4Ô∏è‚É£ Paths (Your folder structure)
# ======================================================
resumes_path = Path("../data/resumes")
jd_path = Path("../data/job_descriptions/jd1.txt")

# ======================================================
# 5Ô∏è‚É£ Load JD text
# ======================================================
with open(jd_path, "r", encoding="utf-8") as f:
    jd_text = f.read()

# ======================================================
# 6Ô∏è‚É£ Read resumes
# ======================================================
resumes_texts = []
metadata = []

for f in resumes_path.glob("*.pdf"):
    resumes_texts.append(read_pdf(f))
    metadata.append({"file_name": f.name})

print(f"‚úÖ Loaded {len(resumes_texts)} resumes")

# ======================================================
# 7Ô∏è‚É£ Create embeddings + FAISS vectorstore
# ======================================================
embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.from_texts(resumes_texts, embedding, metadatas=metadata)

# ======================================================
# 8Ô∏è‚É£ Retrieve top 3 resumes for JD
# ======================================================
results = vectorstore.similarity_search(jd_text, k=3)

# ======================================================
# 9Ô∏è‚É£ Extract candidate name using Gemini
# ======================================================
prompt = PromptTemplate(
    input_variables=["resume_text"],
    template="""
Extract ONLY the candidate's full name from the resume text below.
If not found, say "Unknown".

Resume:
{resume_text}
"""
)

extract_name_chain = prompt | llm | RunnablePassthrough()

def extract_candidate_name(resume_text):
    response = extract_name_chain.invoke({"resume_text": resume_text[:1000]})
    return response.content.strip() if hasattr(response, "content") else str(response)

# ======================================================
# üîü Display final output
# ======================================================
for res in results:
    file_name = res.metadata.get("file_name", "Unknown")
    name = extract_candidate_name(res.page_content)
    print(f"üìÑ File: {file_name}")
    print(f"üë§ Candidate: {name}")
    print("-" * 50)
