In [2]:
import os
from pathlib import Path
from dotenv import load_dotenv

print(f"Current working directory: {os.getcwd()}")
print(f"Files in current directory: {os.listdir(os.getcwd())}")

dotenv_path = Path('.') / 'secrets.txt'

load_dotenv(dotenv_path=dotenv_path)

GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")

if GOOGLE_API_KEY is None:
    print(
        f"❌ Authentication Error: GOOGLE_API_KEY not found in the '{dotenv_path}' file."
    )
    print("Please check the file name, location, and the variable naming inside the file.")
else:
    print(f"✅ Gemini API key setup complete from {dotenv_path}. Key starts with: {GOOGLE_API_KEY[:4]}")

Current working directory: /Users/rameshjayasekaran/Downloads/ai-job-search-assistant
Files in current directory: ['ai-job-search-assistant-notebook.ipynb', 'Untitled.ipynb', 'README.md', 'secrets.txt', '.ipynb_checkpoints', '.git']
✅ Gemini API key setup complete from secrets.txt. Key starts with: AIza


In [3]:
pip install pydantic pandas fastapi uvicorn python-dotenv

Note: you may need to restart the kernel to use updated packages.


In [4]:
import json, os, re, uuid, time
from typing import List, Dict, Any
from pydantic import BaseModel
from datetime import datetime
import pandas as pd
LOG_DIR = "logs"
os.makedirs(LOG_DIR, exist_ok=True)

In [5]:
def mock_call_gemini(prompt: str, temperature: float = 0.0, max_tokens: int = 512) -> str:
    """
    Simple deterministic mock used for development.
    Replace with actual Vertex AI / Gemini call when ready.
    """
    # Very small heuristic responses for key prompt types.
    if "EXTRACT_RESUME_JSON" in prompt:
        # return minimal JSON string (example)
        return json.dumps({
            "name":"Arun Kumar",
            "title":"Backend Developer",
            "emails":["arun@example.com"],
            "phones":["+919900112233"],
            "skills":["Java","Spring Boot","SQL","REST","Docker"],
            "years_experience":4,
            "experience":[
                {"title":"Backend Developer","company":"Acme","start":"2021-06","end":"2024-09","bullets":["Built REST APIs using Spring Boot."]}
            ],
            "education":["B.Tech Computer Science"]
        })
    if "TAILOR_BULLETS" in prompt:
        return json.dumps({
            "bullets": [
                "Designed and implemented RESTful APIs using Spring Boot, improving response time by 30%.",
                "Built database schemas and optimized SQL queries to reduce average query latency by 25%."
            ]
        })
    if "COVER_LETTER" in prompt:
        return "Dear Hiring Manager,\n\nI am excited to apply for the Backend Developer role at Acme... (sample cover letter)\n"
    if "MATCH_EXPLAIN" in prompt:
        return json.dumps({"match_score":0.78, "reasons":["Matches 5/7 skills"], "gaps":["System design"]})
    # default
    return "[MOCK] " + (prompt[:400].replace("\n"," ") + " ...")

In [7]:
# Load jobs from jobs.json (you can replace content)
with open("data/jobs.json","r") as f:
    JOBS = json.load(f)
len(JOBS)

10

In [8]:
class Resume(BaseModel):
    name: str = None
    title: str = None
    emails: List[str] = []
    phones: List[str] = []
    skills: List[str] = []
    years_experience: int = 0
    experience: List[Dict[str, Any]] = []
    education: List[str] = []

class Job(BaseModel):
    id: str
    title: str
    company: str
    location: str
    remote: bool = False
    skills: List[str] = []
    seniority: str = "mid"
    salary: str = ""
    description: str = ""

In [9]:
#Resume Agent
def resume_text_to_json(resume_text: str) -> dict:
    # quick regex heuristics (email/phone), otherwise fallback to LLM
    emails = re.findall(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}", resume_text)
    phones = re.findall(r"\+?\d[\d \-]{7,}\d", resume_text)
    # try to extract skills heuristically: look for "Skills:" line
    skills = []
    m = re.search(r"Skills[:\s]+(.+)", resume_text, re.IGNORECASE)
    if m:
        skills = [s.strip() for s in re.split(r",|\||;", m.group(1)) if s.strip()]
    # If minimal data only, call LLM mock to get structured JSON
    if len(skills) < 2 or not emails:
        prompt = f"EXTRACT_RESUME_JSON\n\n{resume_text}"
        out = mock_call_gemini(prompt, temperature=0)
        parsed = json.loads(out)
        return parsed
    # otherwise build simple JSON
    parsed = {
        "name": resume_text.splitlines()[0].strip() if resume_text else "Unknown",
        "title": None,
        "emails": emails,
        "phones": phones,
        "skills": skills,
        "years_experience": 3,
        "experience": [],
        "education": []
    }
    return parsed

In [11]:
#Search Agent
def search_jobs(query: str="", filters: Dict[str,Any]=None, top_k: int=20) -> List[Dict]:
    filters = filters or {}
    q = query.lower()
    candidates = []
    for job in JOBS:
        score = 0
        # keyword match between query and job title/desc
        if q:
            if q in job["title"].lower() or q in job["company"].lower() or q in job["description"].lower():
                score += 2
        # simple filter checks
        if filters.get("remote") is True and job.get("remote"):
            score += 1
        if "location" in filters and filters["location"].lower() in job["location"].lower():
            score += 1
        candidates.append((score, job))
    # sort by score desc
    candidates = sorted(candidates, key=lambda x: x[0], reverse=True)
    return [c[1] for c in candidates[:top_k]]

In [12]:
#Matching Agent
def match_resume_to_jobs(resume_json: dict, candidate_jobs: List[dict], top_k=10) -> List[dict]:
    r_skills = set([s.lower() for s in resume_json.get("skills",[])])
    ranked = []
    for job in candidate_jobs:
        j_skills = set([s.lower() for s in job.get("skills",[])])
        if len(j_skills)==0:
            skill_score = 0.0
        else:
            skill_score = len(r_skills & j_skills) / len(j_skills)
        seniority_penalty = 0.0
        # simple seniority mapping
        if job.get("seniority","mid")=="senior" and resume_json.get("years_experience",0) < 5:
            seniority_penalty = 0.2
        score = skill_score - seniority_penalty
        rationale = f"skill_match={len(r_skills & j_skills)}/{len(j_skills)}; seniority_penalty={seniority_penalty}"
        ranked.append({"job":job, "score": round(float(score),3), "rationale":rationale})
    ranked = sorted(ranked, key=lambda x: x["score"], reverse=True)
    return ranked[:top_k]

In [13]:
#Tailor agent

def tailor_resume_bullets(resume_json: dict, job_json: dict) -> List[str]:
    prompt = f"TAILOR_BULLETS\nResumeJSON: {json.dumps(resume_json)}\nJobJSON: {json.dumps(job_json)}"
    out = mock_call_gemini(prompt, temperature=0)
    parsed = json.loads(out)
    return parsed.get("bullets", [])

def generate_cover_letter(resume_json: dict, job_json: dict) -> str:
    prompt = f"COVER_LETTER\nResumeJSON: {json.dumps(resume_json)}\nJobJSON: {json.dumps(job_json)}"
    return mock_call_gemini(prompt, temperature=0.7)

In [14]:
#Explainer Agent

In [15]:
def explain_match(resume_json: dict, job_json: dict) -> dict:
    prompt = f"MATCH_EXPLAIN\nResumeJSON: {json.dumps(resume_json)}\nJobJSON: {json.dumps(job_json)}"
    out = mock_call_gemini(prompt, temperature=0)
    return json.loads(out)

In [16]:
TRACKER_FILE = "data/tracker.json"
if not os.path.exists("data"): os.makedirs("data", exist_ok=True)
if not os.path.exists(TRACKER_FILE):
    with open(TRACKER_FILE,"w") as f:
        json.dump([], f)

def add_application(resume_name: str, job_id: str, artifacts: dict):
    with open(TRACKER_FILE,"r") as f:
        arr = json.load(f)
    rec = {
        "id": str(uuid.uuid4()),
        "applied_at": datetime.utcnow().isoformat(),
        "resume_name": resume_name,
        "job_id": job_id,
        "artifacts": artifacts
    }
    arr.append(rec)
    with open(TRACKER_FILE,"w") as f:
        json.dump(arr, f, indent=2)
    return rec

In [20]:
#End to End Demo run

# load a sample resume text (I'll provide 3 samples below)
sample_resume_text = open("data/sample_resume_3.txt").read()
resume_json = resume_text_to_json(sample_resume_text)
print("Parsed resume:", resume_json)

candidates = search_jobs(query="Data Scientist", filters={"remote":True}, top_k=20)
ranked = match_resume_to_jobs(resume_json, candidates, top_k=5)
print("Top matches:")
for r in ranked:
    jid = r["job"]["id"]
    print(r["score"], r["job"]["title"], r["job"]["company"], "->", r["rationale"])
    # generate bullets for top1 only
    if r==ranked[0]:
        bullets = tailor_resume_bullets(resume_json, r["job"])
        letter = generate_cover_letter(resume_json, r["job"])
        print("Tailored bullets:", bullets)
        print("Cover letter (excerpt):", letter[:300])
        rec = add_application(resume_json.get("name","unknown"), jid, {"bullets":bullets, "cover_letter": letter[:200]})
        print("Application recorded:", rec["id"])

Parsed resume: {'name': 'Rohit Menon', 'title': None, 'emails': ['rohit.m@example.com'], 'phones': [], 'skills': ['Python', 'Pandas', 'Scikit-learn', 'SQL', 'Visualization'], 'years_experience': 3, 'experience': [], 'education': []}
Top matches:
1.0 Data Scientist DataSense -> skill_match=4/4; seniority_penalty=0.0
Tailored bullets: ['Designed and implemented RESTful APIs using Spring Boot, improving response time by 30%.', 'Built database schemas and optimized SQL queries to reduce average query latency by 25%.']
Cover letter (excerpt): Dear Hiring Manager,

I am excited to apply for the Backend Developer role at Acme... (sample cover letter)

Application recorded: 36f4aa5d-1030-48af-a2fa-a6b6f1ccdc8e
0.333 QA Automation Engineer TestHive -> skill_match=1/3; seniority_penalty=0.0
0.25 Backend Developer Acme Tech -> skill_match=1/4; seniority_penalty=0.0
0.25 Software Engineer - Backend Nimbus Labs -> skill_match=1/4; seniority_penalty=0.0
0.25 ML Engineer VisionX -> skill_match=1/4; s

  "applied_at": datetime.utcnow().isoformat(),


In [21]:
#Test notebook cell

import json, glob
from src.agents.resume_agent import resume_to_json

results = {}

for file in sorted(glob.glob("data/resumes/*.txt")):
    text = open(file).read()
    parsed = resume_to_json(text)
    results[file] = parsed
    print(file, "->", parsed["skills"], parsed["emails"])

# save parsed outputs
with open("data/parsed_resumes.json", "w") as f:
    json.dump(results, f, indent=2)

data/resumes/resume_1.txt -> ['Java', 'Spring Boot', 'Microservices', 'REST APIs', 'MySQL', 'Docker', 'Kafka'] ['rahul.nair@example.com']
data/resumes/resume_10.txt -> ['Python', 'TensorFlow', 'PyTorch', 'NLP', 'Computer Vision', 'Docker'] ['divya.s@example.com']
data/resumes/resume_11.txt -> ['MLflow', 'Docker', 'Kubernetes', 'Python', 'Airflow'] ['vikram.s@example.com']
data/resumes/resume_12.txt -> ['Docker', 'Kubernetes', 'Terraform', 'AWS', 'CI/CD', 'Linux'] ['ganesh.prasad@example.com']
data/resumes/resume_13.txt -> ['AWS', 'GCP', 'Terraform', 'Jenkins', 'Kubernetes'] ['deepa.raj@example.com']
data/resumes/resume_14.txt -> ['Selenium', 'Python', 'PyTest', 'Postman', 'CI/CD'] ['kiran.k@example.com']
data/resumes/resume_15.txt -> ['Product Strategy', 'Roadmaps', 'PRDs', 'Data Analysis', 'User Research'] ['nisha.v@example.com']
data/resumes/resume_16.txt -> ['Figma', 'Wireframes', 'Prototyping', 'User Research', 'Visual Design'] ['harini.s@example.com']
data/resumes/resume_17.txt ->