<a href="https://colab.research.google.com/github/kusan-139/ATS-Resume-Checker-GoogleColab/blob/main/Resume_checker.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# -*- coding: utf-8 -*-
"""
ATS Resume Checker Project for Google Colab

This script is divided into three main parts:
1.  **Job Description Input (PDF):** Takes a job description as a PDF file input.
2.  **Job Post Information Extraction:** Extracts key details (skills, experience, etc.)
    from the job description text (extracted from PDF) using an LLM (Gemini) with a structured output.
3.  **Resume Comparison & ATS Score:** Uploads a PDF resume, extracts its text,
    compares it against the extracted job post information using an LLM (Gemini),
    and provides an "ATS score" along with explanations.
4.  **PDF Report Generation (using ReportLab):** Generates a PDF report of the analysis results
    and provides a download link.
"""

# --- CRITICAL SETUP: RUN THESE COMMANDS FIRST IN COLAB ---
# Install necessary libraries
!pip install PyPDF2 requests reportlab
# --- END CRITICAL SETUP ---

# Import necessary libraries
import io
import re
import json
import requests # Used for making HTTP requests to the Gemini API
from PyPDF2 import PdfReader # Used to extract text from PDF files
from google.colab import files # Used for file upload and download in Google Colab

# ReportLab imports
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
from reportlab.lib.pagesizes import letter
from reportlab.lib.enums import TA_CENTER, TA_LEFT
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont

# --- Configuration ---
# API Key for Gemini API.
# IMPORTANT: Replace "YOUR_GENERATED_API_KEY_HERE" with your actual API key from Google AI Studio.
# Get your key from: https://aistudio.google.com/app/apikey
API_KEY = " " # <--- PASTE YOUR API KEY HERE!

# --- Helper function for PDF File Upload ---
def upload_pdf_file(prompt_message):
    """
    Uploads a PDF file from your local machine to Google Colab.
    Args:
        prompt_message (str): Message to display to the user for the upload.
    Returns:
        bytes: The file content as bytes, or None if no file uploaded.
    """
    print(prompt_message)
    uploaded = files.upload() # This opens a file dialog in your browser

    if not uploaded:
        print("No file uploaded. Exiting.")
        return None

    # Get the first (and likely only) uploaded file
    file_name = next(iter(uploaded))
    print(f"File '{file_name}' uploaded successfully.")
    return uploaded[file_name]

# --- Helper function for PDF Text Extraction ---
def extract_text_from_pdf(pdf_bytes):
    """
    Extracts text from PDF bytes using PyPDF2.
    Args:
        pdf_bytes (bytes): The content of the PDF file as bytes.
    Returns:
        str: The extracted text from the PDF.
    """
    text = ""
    try:
        # Create a BytesIO object from the PDF bytes
        pdf_file = io.BytesIO(pdf_bytes)
        pdf_reader = PdfReader(pdf_file)

        # Iterate through each page and extract text
        for page_num in range(len(pdf_reader.pages)):
            page = pdf_reader.pages[page_num]
            page_text = page.extract_text()
            if page_text:
                text += page_text + "\n"

    except Exception as e:
        print(f"Error extracting text from PDF: {e}")
        return ""
    return text

# --- Helper function for LLM calls ---
def call_gemini_api(prompt, response_schema=None):
    """
    Makes an HTTP POST call to the Gemini API using Python's requests library.
    Args:
        prompt (str): The text prompt for the LLM.
        response_schema (dict, optional): A JSON schema for structured output.
    Returns:
        dict/str: The parsed JSON response (if schema provided) or raw text.
    """
    headers = {
        'Content-Type': 'application/json'
    }

    apiUrl = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={API_KEY}"

    payload = {
        "contents": [
            {
                "role": "user",
                "parts": [{"text": prompt}]
            }
        ]
    }
    if response_schema:
        payload["generationConfig"] = {
            "responseMimeType": "application/json",
            "responseSchema": response_schema
        }

    try:
        response = requests.post(apiUrl, headers=headers, data=json.dumps(payload))
        response.raise_for_status()
        result = response.json()

        candidates = result.get('candidates')
        if candidates and len(candidates) > 0:
            first_candidate = candidates[0]
            content = first_candidate.get('content')
            if content:
                parts = content.get('parts')
                if parts and len(parts) > 0:
                    raw_content = parts[0].get('text')
                    if raw_content:
                        if response_schema:
                            try:
                                return json.loads(raw_content)
                            except json.JSONDecodeError:
                                print(f"Warning: LLM returned non-JSON for structured request: {raw_content}")
                                return {"error": "LLM returned malformed JSON", "raw_response": raw_content}
                        else:
                            return raw_content
        return {"error": "LLM response structure unexpected or content missing", "raw_response": result}

    except requests.exceptions.RequestException as e:
        print(f"Error making API request: {e}")
        return {"error": f"Error making API request: {e}"}
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return {"error": f"An unexpected error occurred: {e}"}


# --- 2. Part 2: Extract Job Post Information ---
def extract_job_info(job_description_text):
    """
    Extracts key information from the job description text using Gemini.
    Args:
        job_description_text (str): The full job description text.
    Returns:
        dict: A dictionary of extracted job details.
    """
    print("\n--- Part 2: Extracting Job Post Information ---")
    print("Using LLM to extract key details from the job description...")

    job_info_schema = {
        "type": "OBJECT",
        "properties": {
            "jobTitle": {"type": "STRING"},
            "company": {"type": "STRING"},
            "location": {"type": "STRING"},
            "requiredSkills": {
                "type": "ARRAY",
                "items": {"type": "STRING"}
            },
            "preferredSkills": {
                "type": "ARRAY",
                "items": {"type": "STRING"}
            },
            "experienceLevel": {"type": "STRING"},
            "responsibilities": {
                "type": "ARRAY",
                "items": {"type": "STRING"}
            },
            "qualifications": {
                "type": "ARRAY",
                "items": {"type": "STRING"}
            }
        },
        "propertyOrdering": [
            "jobTitle", "company", "location", "requiredSkills",
            "preferredSkills", "experienceLevel", "responsibilities", "qualifications"
        ]
    }

    prompt = f"""
    Analyze the following job description and extract the key information in a structured JSON format.
    Identify required skills, preferred skills, experience level, responsibilities, and qualifications.
    If a field is not explicitly mentioned, provide an empty array for lists or "N/A" for strings.

    Job Description:
    ---
    {job_description_text}
    ---
    """
    extracted_data = call_gemini_api(prompt, job_info_schema)

    if isinstance(extracted_data, dict) and "error" in extracted_data:
        print(f"Error extracting job info: {extracted_data['error']}")
        return None
    return extracted_data

def display_job_info(job_info):
    """Displays the extracted job information in a readable format."""
    if not job_info:
        print("No job information to display.")
        return

    print("\nExtracted Job Information:")
    print(f"  Job Title: {job_info.get('jobTitle', 'N/A')}")
    print(f"  Company: {job_info.get('company', 'N/A')}")
    print(f"  Location: {job_info.get('location', 'N/A')}")

    print("\n  Required Skills:")
    for skill in job_info.get('requiredSkills', []):
        print(f"    - {skill}")
    if not job_info.get('requiredSkills'):
        print("    None specified.")

    print("\n  Preferred Skills:")
    for skill in job_info.get('preferredSkills', []):
        print(f"    - {skill}")
    if not job_info.get('preferredSkills'):
        print("    None specified.")

    print(f"\n  Experience Level: {job_info.get('experienceLevel', 'N/A')}")

    print("\n  Responsibilities:")
    for resp in job_info.get('responsibilities', []):
        print(f"    - {resp}")
    if not job_info.get('responsibilities'):
        print("    None specified.")

    print("\n  Qualifications:")
    for qual in job_info.get('qualifications', []):
        print(f"    - {qual}")
    if not job_info.get('qualifications'):
        print("    None specified.")

# --- 3. Part 3: Resume Comparison and ATS Score ---
def compare_resume_with_job_post(resume_text, job_info):
    """
    Compares the resume text with the extracted job information and calculates an ATS score.
    Args:
        resume_text (str): The extracted text from the resume.
        job_info (dict): The extracted job details.
    Returns:
        dict: A dictionary containing the ATS score and explanation.
    """
    print("\n--- Part 3: Comparing Resume with Job Post & Calculating ATS Score ---")
    print("Using LLM to compare and generate ATS score...")

    job_info_str = json.dumps(job_info, indent=2)

    ats_score_schema = {
        "type": "OBJECT",
        "properties": {
            "atsScore": {"type": "NUMBER"},
            "matchingKeywords": {
                "type": "ARRAY",
                "items": {"type": "STRING"}
            },
            "missingKeywords": {
                "type": "ARRAY",
                "items": {"type": "STRING"}
            },
            "explanation": {"type": "STRING"}
        },
        "propertyOrdering": [
            "atsScore", "matchingKeywords", "missingKeywords", "explanation"
        ]
    }

    prompt = f"""
    You are an Applicant Tracking System (ATS) expert.
    Compare the provided resume content with the job description details.
    Calculate an "ATS Score" out of 100, indicating how well the resume matches the job requirements.
    Provide a list of keywords from the job description that *match* the resume, and a list of *missing* key keywords.
    Finally, give a brief explanation for the score, highlighting strengths and weaknesses for this specific job.

    Job Description Details (extracted):
    ---
    {job_info_str}
    ---

    Resume Content:
    ---
    {resume_text[:6000]} # Limit resume text to fit within token limits
    ---

    Provide the output in the specified JSON format.
    """
    ats_analysis = call_gemini_api(prompt, ats_score_schema)

    if isinstance(ats_analysis, dict) and "error" in ats_analysis:
        print(f"Error calculating ATS score: {ats_analysis['error']}")
        return None
    return ats_analysis

def display_ats_results(ats_analysis):
    """Displays the ATS score and analysis."""
    if not ats_analysis:
        print("No ATS analysis results to display.")
        return

    print("\n--- ATS Analysis Results ---")
    print(f"ATS Score: {ats_analysis.get('atsScore', 'N/A')}/100")

    print("\nMatching Keywords:")
    for kw in ats_analysis.get('matchingKeywords', []):
        print(f"  - {kw}")
    if not ats_analysis.get('matchingKeywords'):
        print("  None identified.")

    print("\nMissing Keywords:")
    for kw in ats_analysis.get('missingKeywords', []):
        print(f"  - {kw}")
    if not ats_analysis.get('missingKeywords'):
        print("  None identified.")

    print("\nExplanation:")
    print(ats_analysis.get('explanation', 'No explanation provided.'))


# --- 4. PDF Report Generation Function (using ReportLab) ---
def generate_output_pdf(job_description_text, job_info, ats_analysis_results, output_filename="ats_report.pdf"):
    """
    Generates a PDF report of the ATS analysis using ReportLab.
    Args:
        job_description_text (str): The full text of the job description.
        job_info (dict): Extracted job information.
        ats_analysis_results (dict): ATS comparison results.
        output_filename (str): The name of the PDF file to save.
    Returns:
        str: The filename of the generated PDF.
    """
    doc = SimpleDocTemplate(output_filename, pagesize=letter)
    styles = getSampleStyleSheet()
    story = []

    # Register a Unicode font (DejaVuSans is a good choice for broader character support)
    # This font is typically available in Colab or can be easily downloaded.
    # We'll use a common font that should work without explicit download in most Colab instances
    # Or, if needed, you can wget a .ttf file like DejaVuSans.ttf and register it.
    # For now, let's rely on ReportLab's built-in handling for basic fonts and hope for the best with Unicode.
    # If Unicode characters still cause issues, we might need to wget and register a specific TTF.

    # Define custom styles for different sections
    style_title = ParagraphStyle(
        'Title',
        parent=styles['h1'],
        fontSize=20,
        alignment=TA_CENTER,
        spaceAfter=14
    )
    style_heading = ParagraphStyle(
        'Heading',
        parent=styles['h2'],
        fontSize=14,
        spaceBefore=10,
        spaceAfter=6,
        leading=16
    )
    style_body = ParagraphStyle(
        'BodyText',
        parent=styles['Normal'],
        fontSize=10,
        leading=12,
        spaceAfter=6
    )
    style_list_item = ParagraphStyle(
        'ListItem',
        parent=styles['Normal'],
        fontSize=10,
        leading=12,
        leftIndent=20,
        spaceAfter=3
    )

    # Title
    story.append(Paragraph("ATS Resume Analysis Report", style_title))
    story.append(Spacer(1, 0.2 * inch))

    # Section: Original Job Description Snippet
    story.append(Paragraph("1. Original Job Description Snippet", style_heading))
    story.append(Paragraph(job_description_text[:1500] + "..." if len(job_description_text) > 1500 else job_description_text, style_body))
    story.append(Spacer(1, 0.1 * inch))

    # Section: Extracted Job Information
    story.append(Paragraph("2. Extracted Job Information", style_heading))
    if job_info:
        story.append(Paragraph(f"<b>Job Title:</b> {job_info.get('jobTitle', 'N/A')}", style_body))
        story.append(Paragraph(f"<b>Company:</b> {job_info.get('company', 'N/A')}", style_body))
        story.append(Paragraph(f"<b>Location:</b> {job_info.get('location', 'N/A')}", style_body))
        story.append(Spacer(1, 0.1 * inch))

        story.append(Paragraph("<b>Required Skills:</b>", style_body))
        if job_info.get('requiredSkills'):
            for skill in job_info['requiredSkills']:
                story.append(Paragraph(f"• {skill}", style_list_item))
        else:
            story.append(Paragraph("None specified.", style_list_item))
        story.append(Spacer(1, 0.1 * inch))

        story.append(Paragraph("<b>Preferred Skills:</b>", style_body))
        if job_info.get('preferredSkills'):
            for skill in job_info['preferredSkills']:
                story.append(Paragraph(f"• {skill}", style_list_item))
        else:
            story.append(Paragraph("None specified.", style_list_item))
        story.append(Spacer(1, 0.1 * inch))

        story.append(Paragraph(f"<b>Experience Level:</b> {job_info.get('experienceLevel', 'N/A')}", style_body))
        story.append(Spacer(1, 0.1 * inch))

        story.append(Paragraph("<b>Responsibilities:</b>", style_body))
        if job_info.get('responsibilities'):
            for resp in job_info['responsibilities']:
                story.append(Paragraph(f"• {resp}", style_list_item))
        else:
            story.append(Paragraph("None specified.", style_list_item))
        story.append(Spacer(1, 0.1 * inch))

        story.append(Paragraph("<b>Qualifications:</b>", style_body))
        if job_info.get('qualifications'):
            for qual in job_info['qualifications']:
                story.append(Paragraph(f"• {qual}", style_list_item))
        else:
            story.append(Paragraph("None specified.", style_list_item))
    else:
        story.append(Paragraph("No job information extracted.", style_body))
    story.append(Spacer(1, 0.2 * inch))

    # Section: ATS Analysis Results
    story.append(Paragraph("3. ATS Analysis Results", style_heading))
    if ats_analysis_results:
        story.append(Paragraph(f"<b>ATS Score:</b> {ats_analysis_results.get('atsScore', 'N/A')}/100", style_body))
        story.append(Spacer(1, 0.1 * inch))

        story.append(Paragraph("<b>Matching Keywords:</b>", style_body))
        if ats_analysis_results.get('matchingKeywords'):
            for kw in ats_analysis_results['matchingKeywords']:
                story.append(Paragraph(f"• {kw}", style_list_item))
        else:
            story.append(Paragraph("None identified.", style_list_item))
        story.append(Spacer(1, 0.1 * inch))

        story.append(Paragraph("<b>Missing Keywords:</b>", style_body))
        if ats_analysis_results.get('missingKeywords'):
            for kw in ats_analysis_results['missingKeywords']:
                story.append(Paragraph(f"• {kw}", style_list_item))
        else:
            story.append(Paragraph("None identified.", style_list_item))
        story.append(Spacer(1, 0.1 * inch))

        story.append(Paragraph("<b>Explanation:</b>", style_body))
        story.append(Paragraph(ats_analysis_results.get('explanation', 'No explanation provided.'), style_body))
    else:
        story.append(Paragraph("No ATS analysis results available.", style_body))
    story.append(Spacer(1, 0.2 * inch))

    try:
        doc.build(story)
        return output_filename
    except Exception as e:
        print(f"Error building PDF with ReportLab: {e}")
        return None


# --- Main Execution Flow ---
def main():
    """
    Main function to run the ATS resume checker.
    """
    # 1. Get Job Description PDF and Extract Text
    print("--- Part 1: Upload Job Description PDF ---")
    jd_pdf_bytes = upload_pdf_file("Please upload the Job Description (PDF file):")
    if jd_pdf_bytes is None:
        return

    print("\n--- Extracting text from Job Description PDF ---")
    job_description_text = extract_text_from_pdf(jd_pdf_bytes)

    if not job_description_text:
        print("Could not extract text from the Job Description PDF. Please check the file.")
        return

    print("\n--- Extracted Job Description Text (first 500 chars) ---")
    print(job_description_text[:500] + "..." if len(job_description_text) > 500 else job_description_text)

    # 2. Extract Job Post Information
    job_info = extract_job_info(job_description_text)
    if job_info is None:
        return
    display_job_info(job_info)

    # 3. Upload Resume PDF and Extract Text
    print("\n--- Preparing Resume for Comparison ---")
    resume_pdf_bytes = upload_pdf_file("Please upload your Resume (PDF file):")
    if resume_pdf_bytes is None:
        return

    print("\n--- Extracting text from Resume PDF ---")
    resume_text = extract_text_from_pdf(resume_pdf_bytes)

    if not resume_text:
        print("Could not extract text from the Resume PDF. Please check the file.")
        return

    print("\n--- Extracted Resume Text (first 500 chars) ---")
    print(resume_text[:500] + "..." if len(resume_text) > 500 else resume_text)

    # 4. Compare Resume with Job Post and Calculate ATS Score
    ats_analysis_results = compare_resume_with_job_post(resume_text, job_info)
    if ats_analysis_results is None:
        return
    display_ats_results(ats_analysis_results)

    # 5. Generate and Download PDF Report
    print("\n--- Generating PDF Report ---")
    pdf_filename = generate_output_pdf(job_description_text, job_info, ats_analysis_results)
    if pdf_filename:
        print(f"Report saved as '{pdf_filename}'.")
        files.download(pdf_filename) # This will trigger the download in Colab
    else:
        print("Failed to generate PDF report.")

# Run the main function
main()
