<a href="https://colab.research.google.com/github/solu22/match-my-cv/blob/development/resume-analyzer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import files
import pandas as pd
import re
import json
from google import genai
from google.colab import userdata
import os



job_1 = "You will be part of our Patent Transactions team in Technology Standards.  Our team is mostly focused on securing the best possible patent acquisitions (and related) at the earliest possible dates that are consistent with Technology Standards strategy. We are looking for a Software Developer with expertise in artificial intelligence (AI) and machine learning (ML) to optimize software tools for patent acquisition-related analysis.  In this role, you will optimize software solutions on approved platforms that integrate the latest AI and ML methods into patent analysis workflows. You will work with massive datasets—including global patent collections, technical standards, scientific literature, and other technical documents—and optimize tools that automate and enhance tasks such as patent categorization, prior art relevance assessment, technology mapping, and large-scale document evaluation.  You will collaborate closely with patent acquisition specialists, data scientists, and domain experts to optimize high-impact tools that scale across millions of documents."
job_2 = "Hitachi Energy is committed to creating a sustainable, flexible, and secure energy system. We need talented people from different backgrounds, genders, and cultures to achieve our purpose of advancing a sustainable energy future for all. Our goal is to attract diverse talent by providing learning opportunity during the summer for students at all levels in Finland."
job_3 = "As a Software Engineer, you will be instrumental in the evolution of our sales configuration tool. This role focuses on the crucial daily maintenance of complex configuration rules to ensure seamless functionality and an optimal user experience, alongside hands-on development work in Java. You will contribute to a modern, agile environment, enhancing our core product."


job_descriptions = {"desc1": job_1, "desc2": job_2, "desc3": job_3}
uploaded = files.upload()
filename = list(uploaded.keys())[0]
df = pd.read_excel(filename)
print(f" File {filename} uplaoded succussfully")


def match_job(file, job_desc):
  #Load CV
  try:
    df = pd.read_excel(file)
  except Exception as e:
    print(f"Error reading Excel file: {e}")
    return
  skills_row = df[df['Section'].str.contains('Skills', case=False, na=False)]

# Extract candidate skills
  if skills_row.empty:
    print("Error: Could not find a row labeled 'Skills' in the 'Section' column")
    return
  skills_text = skills_row.iloc[0]['Details']

#Prompt generation

  prompt = f"""
You are an HR analyst. Your job is to compare a candidate’s CV skills with a set of job descriptions.
Use ONLY the skills listed in the CV. Do NOT infer any skills not mentioned explicitly.

RULES:
- Do NOT hallucinate or assume skills not in the CV.
- Use ONLY the candidate’s listed skills.
- Compare the two lists mathematically:
     - MATCH = (Candidate Skills) INTERSECT (Job Requirements)
     - MISSING = (Job Requirements) MINUS (Candidate Skills)
- Return VALID JSON ONLY. Do not include markdown or explanations outside JSON.
- Keep summaries concise and factual.

STANDARDIZATION:
- Technical Skills: programming languages, AI/ML tools, frameworks, domain-specific software.
- Domain Knowledge: industry-specific concepts, standards, processes, terminology.
- Relevance: How well the candidate’s skills match the job description.
- Gaps: Skills mentioned in the job but missing in the CV.

OUTPUT STRUCTURE:
Return JSON with these fields for each job:

{{
  "candidate_skills": [list of skills exactly as in CV],
  "job_matches": {{
    "job_id": {{
      "strengths": [skills listed in the candidate_skills that MATCH requirements in the job description],
      "weaknesses": [keywords in the job description missing from candidate skills],
      "suggestions": [practical advice to improve skills or fill gaps]
    }}
  }}
}}

Candidate Skills:
  {skills_text}

Job Descriptions:
  {json.dumps(job_descriptions, indent=2)}
"""


#Gemini configuration

  API_KEY = userdata.get("GEMINI_API_KEY")
  print("API Key loaded:", bool(API_KEY))
  client = genai.Client(api_key= API_KEY)
  try:
    response = client.models.generate_content(
    model="gemini-2.5-flash",
    contents = prompt,
      )
    result_text = response.text.strip()
  except Exception as e:
    print(f"Error calling Gemini API: {e}")
    return

#Clean Gemini response
  clean_result = re.sub(r'```json\n', '', result_text) # remove starting ```json
  clean_result = re.sub(r"\n```$", "", clean_result)      # remove ending ```
  clean_result = clean_result.strip()

  try:
    result_dict = json.loads(clean_result)
    print("Json parsed successfully")
  except json.JSONDecodeError as e:
    print(f"Error parsing JSON: {e}")
    return

  if result_dict:
    candidate_skills = result_dict.get("candidate_skills", [])
    job_matches = result_dict.get("job_matches", {})

    print("\nCandidate Skills:", candidate_skills)

    for job_id, job_info in job_matches.items():
        print(f"\nJob ID: {job_id}")
        print("Strengths:", " , ".join(job_info.get("strengths", [])))
        print("Weaknesses:", " , ".join(job_info.get("weaknesses", [])))
        print("Suggestions:", " , ".join(job_info.get("suggestions", [])))
  else:
    print("No data returned from Gemini.")


match_job(filename,job_descriptions)