In [13]:
import os
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from sentence_transformers import SentenceTransformer, util
from sklearn.metrics.pairwise import cosine_similarity
import pypdf
import docx

# --- INITIALIZE MODELS ---
# Load the Sentence-Transformer model for calculating embeddings
print("Loading embedding model...")
scoring_model = SentenceTransformer('./output/recruitment-model-v1')

# Initialize the LLM for the generative tasks
print("Initializing LLM...")
llm = ChatGoogleGenerativeAI(model="gemini-2.5-pro", temperature=0.6)

Loading embedding model...
Initializing LLM...


In [14]:
def extract_text_from_file(file_path):
    """Extracts text from a PDF or DOCX file."""
    if file_path.endswith('.pdf'):
        try:
            reader = pypdf.PdfReader(file_path)
            text = "".join(page.extract_text() for page in reader.pages)
            return text
        except Exception as e:
            print(f"Error reading PDF {file_path}: {e}")
            return ""
    elif file_path.endswith('.docx'):
        try:
            doc = docx.Document(file_path)
            text = "\n".join(paragraph.text for paragraph in doc.paragraphs)
            return text
        except Exception as e:
            print(f"Error reading DOCX {file_path}: {e}")
            return ""
    else:
        print(f"Unsupported file format: {file_path}")
        return ""

In [15]:
def score_resume(job_description, resume_text, model):
    """
    Calculates the semantic similarity score between a job description and a resume.
    """
    # Generate embeddings for both texts
    jd_embedding = model.encode(job_description, convert_to_tensor=True)
    resume_embedding = model.encode(resume_text, convert_to_tensor=True)

    # Calculate Cosine Similarity
    # util.cos_sim returns a tensor, we get the score using .item()
    cosine_score = util.cos_sim(jd_embedding, resume_embedding).item()

    # Return score as a percentage
    return round(cosine_score * 100, 2)

In [16]:
def create_enrichment_chain(llm):
    """
    Creates the updated LangChain chain for generating a comprehensive candidate report.
    """
    prompt_template = """
    You are an expert AI Hiring Assistant reviewing a candidate for an AI Intern role.
    Your task is to create a detailed, structured analysis report based on the provided Job Description and the Candidate's Resume.

    **Job Description:**
    ```{job_description}```

    **Candidate's Resume:**
    ```{resume_text}```

    ---
    **Instructions:**
    Based on the information above, generate the following report with markdown formatting. Ensure all four sections are present.

    ### **1. Overall Fit Score**
    - Provide a score out of 10 (e.g., **8.5/10**).
    - Write a concise **Justification** explaining why you gave this score, considering both strengths and weaknesses. Reconcile the provided semantic score if it seems to contradict the qualitative fit.

    ### **2. Candidate Summary**
    - Provide 3 concise bullet points highlighting the candidate's strongest qualifications, relevant experience, and key skills that align with the job.

    ### **3. Skill Match Analysis**
    - List the key skills from the job description (e.g., LangChain, Python, PyTorch, Cloud).
    - For each skill, indicate if a match was found in the resume using these emojis:
        - ✅ **Match Found:** If the skill is clearly present.
        - ⚠️ **Partial/Indirect Match:** If related experience is mentioned but not the exact skill.
        - ❌ **Not Mentioned:** If the skill is missing.
    - Briefly state the evidence from the resume.

    ### **4. Personalized Interview Questions**
    - Create a list of 3 insightful questions that probe deeper into the candidate's specific projects or roles mentioned in their resume.
    """

    # Note: We are no longer using LLMChain, but the newer recommended syntax.
    # The prompt is created and then "piped" to the language model.
    prompt = PromptTemplate(
        template=prompt_template,
        input_variables=["job_description", "resume_text"]
    )

    # This "chain" is now the prompt piped to the LLM
    return prompt | llm

In [17]:
# Corrected final cell to replace the last cell in your notebook

def main():
    # --- Create the chain object by CALLING the function ---
    enrichment_chain = create_enrichment_chain(llm)

    # --- INPUTS ---
    job_description = """
Here is a job description for a Software Development Engineer (SDE) Intern, tailored for a tech company in Hyderabad.

## Job Description: Software Development Engineer (SDE) Intern
Company: CodeCraft Technologies
Location: Hyderabad, Telangana
Position: SDE Intern
Duration: 6 Months
Type: Full-time Internship

## Job Summary
CodeCraft Technologies is looking for talented and driven SDE Interns to join our core engineering team in Hyderabad. As an intern, you will move beyond academic projects and contribute directly to our live software products. You will be immersed in a fast-paced environment, working on real-world challenges and learning how to build scalable, high-quality software from experienced engineers.

## Responsibilities
Design, develop, test, and deploy software features for our core applications.

Write clean, maintainable, and well-documented code in languages like Python, Java, or Go.

Collaborate with senior engineers and product managers to translate requirements into technical solutions.

Participate in code reviews to learn and maintain high-quality code standards.

Assist in troubleshooting and debugging issues in production and staging environments.

## Qualifications
Required:

Currently pursuing a B.Tech or M.Tech degree in Computer Science or a related field.

Strong foundation in Computer Science fundamentals: Data Structures, Algorithms, and Object-Oriented Programming (OOP).

Proficiency in at least one programming language such as Python, Java, or C++.

Excellent problem-solving and analytical abilities.

Familiarity with version control systems like Git.

Bonus Points (Preferred):

Previous internship or significant project experience (e.g., a strong GitHub profile).

Familiarity with cloud platforms (AWS, GCP, or Azure).

Experience with databases (SQL or NoSQL) and building APIs.

## What We Offer
The opportunity to ship code that will be used by real customers.

Dedicated mentorship from a senior software engineer.

A culture of technical excellence and continuous learning.

A competitive stipend and potential for a full-time offer upon successful completion of the internship.
"""

    resume_folder = "resumes"
    if not os.path.exists(resume_folder):
        os.makedirs(resume_folder)
        print(f"Created a folder named '{resume_folder}'. Please add resume files to it.")
        return

    resume_files = [os.path.join(resume_folder, f) for f in os.listdir(resume_folder)]
    if not resume_files:
        print(f"No resumes found in the '{resume_folder}' folder.")
        return

    # --- 1. SCORING AND RANKING ---
    print("\n--- Scoring Resumes ---")
    candidate_scores = []
    for resume_path in resume_files:
        resume_text = extract_text_from_file(resume_path)
        if resume_text:
            # Pass the scoring_model to the function
            score = score_resume(job_description, resume_text, scoring_model)
            candidate_scores.append({"path": resume_path, "score": score, "text": resume_text})
            print(f"Scored {os.path.basename(resume_path)}: {score}%")

    ranked_candidates = sorted(candidate_scores, key=lambda x: x['score'], reverse=True)

    # --- 2. FILTERING & ENRICHMENT ---
    print("\n--- Top 3 Candidates Analysis ---")
    top_n = 3
    for candidate in ranked_candidates[:top_n]:
        print("\n" + "="*50)
        print(f"CANDIDATE: {os.path.basename(candidate['path'])}")
        print(f"SCORE: {candidate['score']}%")
        print("--- Generating Insights ---")

        try:
            # --- CORRECTED INVOCATION ---
            # Now you call .invoke() on the chain OBJECT, not the function
            enrichment_result = enrichment_chain.invoke({
                "job_description": job_description,
                "resume_text": candidate['text']
            })
            # The result from an LLM call is an AIMessage object; its content is in the .content attribute
            print(enrichment_result.content)

        except Exception as e:
            print(f"An error occurred: {e}")
        
        print("="*50)

if __name__ == "__main__":
    main()


--- Scoring Resumes ---
Scored Python_Developer_Resume.docx: 12.56%
Scored ramanjanmanchikatla_resume.pdf: 57.71%
Scored Resume3.0.pdf: 22.26%
Scored ResumeParthiv.pdf: 38.88%
Scored Resume_1.pdf: 34.29%
Scored Resume_10.pdf: 60.68%
Scored Resume_2.pdf: 10.83%
Scored Resume_3.pdf: 40.83%
Scored Resume_4.pdf: 58.62%
Scored Resume_5.pdf: 31.38%
Scored Resume_6.pdf: 45.22%
Scored Resume_7.pdf: 37.74%
Scored Resume_8.pdf: 31.16%
Scored Resume_9.pdf: 26.43%

--- Top 3 Candidates Analysis ---

CANDIDATE: Resume_10.pdf
SCORE: 60.68%
--- Generating Insights ---


KeyboardInterrupt: 