<a href="https://colab.research.google.com/github/saurabhk19/ResumeLLM/blob/main/ResumeLLM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
# Install packages
# Use %pip to install packages in Colab
get_ipython().system('pip install langchain pdfplumber docx2txt fastapi uvicorn streamlit')



In [10]:
import pdfplumber
import docx2txt
import json

def extract_text_from_pdf(pdf_path):
    """Extracts text from a PDF file."""
    text = ""
    try:
        with pdfplumber.open(pdf_path) as pdf:
            for page in pdf.pages:
                text += page.extract_text() or ""
    except Exception as e:
        print(f"Error extracting text from PDF: {e}")
        return None
    return text

def extract_text_from_docx(docx_path):
    """Extracts text from a DOCX file."""
    text = ""
    try:
        text = docx2txt.process(docx_path)
    except Exception as e:
        print(f"Error extracting text from DOCX: {e}")
        return None
    return text

def extract_text_to_json(file_path):
    """Extracts text from a file (PDF or DOCX) and returns it as a JSON string."""
    text = None
    if file_path.lower().endswith('.pdf'):
        text = extract_text_from_pdf(file_path)
    elif file_path.lower().endswith('.docx') or file_path.lower().endswith('.doc'):
        text = extract_text_from_docx(file_path)
    else:
        print("Unsupported file type. Please provide a PDF or DOCX file.")
        return None

    if text is not None:
        return json.dumps({"text": text}, indent=4)
    else:
        return None

In [11]:
get_ipython().system('pip install docx2txt')



In [12]:
get_ipython().system('pip install pdfplumber')



In [28]:
def extract_job_description_info_with_gemini(job_description_text, model_name="gemini-2.0-flash-lite"):
    """
    Extracts key information from job description text using a Gemini LLM model.

    Args:
        job_description_text (str): The text of the job description.
        model_name (str): The name of the Gemini model to use (default is "gemini-2.0-flash-lite").

    Returns:
        dict: A dictionary containing the extracted information, or None if extraction fails.
    """
    if not GOOGLE_API_KEY:
        print("Google AI API key not configured. Cannot extract information.")
        return None

    if not job_description_text:
        print("No job description text provided.")
        return None

    try:
        # Initialize the Gemini model
        gemini_model = genai.GenerativeModel(model_name)

        prompt = f"""Extract the following fields from this job description:
requirements, responsibilities, keywords.

Job Description Text:
{job_description_text}

Provide the output in a JSON format with the extracted fields.
"""
        # Use start_chat and send the prompt as a message
        chat = gemini_model.start_chat(history=[])
        response = chat.send_message(prompt)

        # Attempt to parse the response as JSON
        try:
            extracted_data = json.loads(response.text)
            return extracted_data
        except json.JSONDecodeError:
            print("Failed to parse LLM response as JSON. Raw response:")
            print(response.text)
            return {"raw_response": response.text}

    except Exception as e:
        print(f"Error during job description information extraction with Gemini: {e}")
        return None

# Example usage (you would replace 'your_job_description_text' with the actual job description)
# job_description = "This is a sample job description text with requirements, responsibilities, and keywords."
# job_info = extract_job_description_info_with_gemini(job_description)
# if job_info:
#     print("\nExtracted Job Description Information:")
#     if "raw_response" in job_info:
#         print("Raw response from model:")
#         print(job_info["raw_response"])
#     else:
#         print(json.dumps(job_info, indent=4))
# else:
#     print("Failed to extract job description information.")



In [36]:
# Import the Python SDK
import google.generativeai as genai
import json
import os
# Used to securely store your API key
from google.colab import userdata

# Assuming extract_text_to_json is defined in a previous cell and available in the environment
# from your_text_extraction_module import extract_text_to_json

# Get your API key from Colab Secrets
try:
    GOOGLE_API_KEY = userdata.get('Colab_for_resumellm')
    genai.configure(api_key=GOOGLE_API_KEY)
except Exception as e:
    print(f"Error configuring Google AI API: {e}")
    print("Please make sure you have added your API key to Colab Secrets with the name GOOGLE_API_KEY.")
    GOOGLE_API_KEY = None # Set to None if API key is not found

def extract_resume_info_with_gemini(json_text, model_name="gemini-2.0-flash-lite"):
    """
    Extracts key information from resume text using a Gemini LLM model.

    Args:
        json_text (str): A JSON string containing the resume text under the key "text".
        model_name (str): The name of the Gemini model to use (default is "gemini-1.5-flash-latest").

    Returns:
        dict: A dictionary containing the extracted information, or None if extraction fails.
    """
    if not GOOGLE_API_KEY:
        print("Google AI API key not configured. Cannot extract information.")
        return None

    try:
        data = json.loads(json_text)
        resume_text = data.get("text", "")
        if not resume_text:
            print("No text found in the provided JSON.")
            return None

        # Initialize the Gemini model
        gemini_model = genai.GenerativeModel(model_name)

        # Use start_chat and send the prompt as a message
        chat = gemini_model.start_chat(history=[])
        response = chat.send_message(f"""Extract the following fields from this resume:
name, contact, education, experience, skills, achievements.

Resume Text:
{resume_text}

Provide the output in a JSON format with the extracted fields.
""")


        # Attempt to parse the response as JSON
        try:
            # Assuming the model directly returns a JSON string in its response
            extracted_data = json.loads(response.text)
            return extracted_data
        except json.JSONDecodeError:
            print("Failed to parse LLM response as JSON. Raw response:")
            # print(response.text)
            return {"raw_response": response.text} # Return raw response if JSON parsing fails

    except Exception as e:
        print(f"Error during resume information extraction with Gemini: {e}")
        return None

def main(file_path):
    """
    Main function to extract text from a file and then extract key information using Gemini.

    Args:
        file_path (str): The path to the file (PDF or DOCX) to process.
    """
    if not os.path.exists(file_path):
        print(f"Error: File not found at {file_path}")
        return

    print(f"Processing file: {file_path}")

    # Assuming extract_text_to_json is available from a previous cell
    # Extract structured json from uploaded resume file.
    json_text = extract_text_to_json(file_path)

    if json_text:
        print("Text extracted successfully. Extracting information with Gemini...")

        # Extract information using Gemini
        extracted_info = extract_resume_info_with_gemini(json_text)

        if extracted_info is not None: # Check if extracted_info is not None
            print("\nExtracted Information:")
            # Check if it's a dictionary and contains the 'raw_response' key
            if isinstance(extracted_info, dict) and "raw_response" in extracted_info:
                print("Raw response from model:")
                print(extracted_info["raw_response"])
            # Check if it's a dictionary and contains expected keys (adjust as needed based on expected JSON)
            elif isinstance(extracted_info, dict): # and all(key in extracted_info for key in ["name", "contact", "education", "experience", "skills", "achievements"]):
                 print(json.dumps(extracted_info, indent=4))
            # If it's a dictionary but doesn't have expected keys
            elif isinstance(extracted_info, dict):
                 print("Unexpected dictionary format from Gemini extraction:")
                 print(json.dumps(extracted_info, indent=4))
            else:
                print("Unexpected output format from Gemini extraction.")
                print(f"Output type: {type(extracted_info)}")
                print(f"Output: {extracted_info}") # Print the unexpected output
        else:
            print("Failed to extract information using Gemini.")
    else:
        print("Failed to extract text from the file.")

    # Call method that accepts job description text and extracts requirements, responsibilities, and keywords from the job description and returns a structured json
    # Example job description text
    sample_job_description = """
    Job Title: Software Engineer

    Responsibilities:
    - Develop, test, and deploy high-quality software solutions.
    - Collaborate with cross-functional teams to define, design, and ship new features.
    - Write clean, maintainable, and efficient code.
    - Participate in code reviews and provide constructive feedback.
    - Troubleshoot and debug production issues.

    Requirements:
    - Bachelor's degree in Computer Science or a related field.
    - 3+ years of experience in software development.
    - Proficiency in Python and JavaScript.
    - Experience with cloud platforms (e.g., AWS, Google Cloud).
    - Strong understanding of data structures and algorithms.

    Keywords:
    Software Engineer, Python, JavaScript, AWS, Google Cloud, Agile, Development, Testing, Deployment, Troubleshooting
    """

    # Extract information from the job description
    job_info = extract_job_description_info_with_gemini(sample_job_description)

    # Print the extracted information
    if job_info:
        print("\nExtracted Job Description Information:")
        if "raw_response" in job_info:
            print("Raw response from model:")
            print(job_info["raw_response"])
        else:
            print(json.dumps(job_info, indent=4))
    else:
        print("Failed to extract job description information.")

    # generate new resume json based on the JD
    updated_extracted_info = generate_resume_info_based_on_JD(extracted_info["raw_response"], job_info["raw_response"])
    print("\nUpdated Extracted Information:")
    print(json.dumps(updated_extracted_info, indent=4))

# Example usage (replace 'your_resume.pdf' or 'your_resume.docx' with the actual file path)
# if __name__ == "__main__":
#     resume_file_path = 'your_resume.pdf'  # Or 'your_resume.docx'
#     main(resume_file_path)

In [34]:
def generate_resume_info_based_on_JD(resume_json_string, job_description_json_string, model_name="gemini-2.0-flash-lite"):
    """
    Generates updated resume information based on job description using a Gemini LLM model.

    Args:
        resume_json_string (str): A JSON string containing the extracted resume information.
        job_description_json_string (str): A JSON string containing the extracted job description information.
        model_name (str): The name of the Gemini model to use (default is "gemini-2.0-flash-lite").

    Returns:
        dict: A dictionary containing the updated resume information, or None if the process fails.
    """
    if not GOOGLE_API_KEY:
        print("Google AI API key not configured. Cannot generate updated information.")
        return None

    try:
        # Load the JSON data
        resume_data = json.loads(resume_json_string)
        job_description_data = json.loads(job_description_json_string)

        # Initialize the Gemini model
        gemini_model = genai.GenerativeModel(model_name)

        prompt = f"""You are a helpful assistant that helps tailor a resume based on a job description.
Given the following resume information and job description information in JSON format, generate an updated resume JSON.
The updated resume should highlight the most relevant skills, experience, and achievements from the original resume based on the requirements, responsibilities, and keywords in the job description.
Do not invent information that is not present in the original resume.
Ensure the output is in valid JSON format, similar to the structure of the input resume JSON.

Resume Information:
{json.dumps(resume_data, indent=4)}

Job Description Information:
{json.dumps(job_description_data, indent=4)}

Updated Resume Information (JSON format):
"""
        # Use start_chat and send the prompt as a message
        chat = gemini_model.start_chat(history=[])
        response = chat.send_message(prompt)

        # Attempt to parse the response as JSON
        try:
            updated_resume_data = json.loads(response.text)
            return updated_resume_data
        except json.JSONDecodeError:
            print("Failed to parse LLM response as JSON for updated resume. Raw response:")
            print(response.text)
            return {"raw_response": response.text}

    except json.JSONDecodeError as e:
        print(f"Error parsing input JSON strings: {e}")
        return None
    except Exception as e:
        print(f"Error during resume update with Gemini: {e}")
        return None

# Example usage (you would replace the JSON strings with your actual data)
# sample_resume_json = """{"name": "John Doe", "skills": ["Python", "Java"], "experience": [...]}"""
# sample_jd_json = """{"requirements": ["Python experience"], "responsibilities": [...], "keywords": [...]}"""
# updated_resume = generate_resume_info_based_on_JD(sample_resume_json, sample_jd_json)
# if updated_resume:
#     print("\nUpdated Resume Information:")
#     if "raw_response" in updated_resume:
#         print("Raw response from model:")
#         print(updated_resume["raw_response"])
#     else:
#         print(json.dumps(updated_resume, indent=4))
# else:
#     print("Failed to generate updated resume information.")

In [37]:
# Replace 'your_resume_file_path' with the actual path to your resume file (e.g., 'my_resume.pdf')
resume_file_path = '/content/Test_Resume.docx'
main(resume_file_path)


Processing file: /content/Test_Resume.docx
Text extracted successfully. Extracting information with Gemini...
Failed to parse LLM response as JSON. Raw response:

Extracted Information:
Raw response from model:
```json
{
  "name": "Henry Kissinger Sr.",
  "contact": {
    "location": "New York, US"
  },
  "education": [
    {
      "degree": "Bachelor of Engineering (Electronics Engineering)",
      "institution": "University of Texas",
      "location": "Housten",
      "year": "2005-2009"
    },
    {
      "degree": "Post Graduate Diploma in VLSI design",
      "institution": "Columbia University"
    }
  ],
  "experience": [
    {
      "title": "Technical lead/ Solutions Architect",
      "company": "top US bank",
      "years": "2013 – Till date",
      "location": "New York, US",
      "description": "Worked on designing and implementing software systems for a top US bank. The projects included check processing, online BillPay, online money transfers, mobile banking, client segm