In [2]:
import os
from pathlib import Path
from dotenv import load_dotenv

print(f"Current working directory: {os.getcwd()}")
print(f"Files in current directory: {os.listdir(os.getcwd())}")

dotenv_path = Path('.') / 'secrets.txt'

load_dotenv(dotenv_path=dotenv_path)

GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")

if GOOGLE_API_KEY is None:
    print(
        f"❌ Authentication Error: GOOGLE_API_KEY not found in the '{dotenv_path}' file."
    )
    print("Please check the file name, location, and the variable naming inside the file.")
else:
    print(f"✅ Gemini API key setup complete from {dotenv_path}. Key starts with: {GOOGLE_API_KEY[:4]}")

Current working directory: /Users/rameshjayasekaran/Downloads/ai-job-search-assistant
Files in current directory: ['.DS_Store', 'ai-job-search-assistant-notebook.ipynb', 'Untitled.ipynb', 'README.md', 'logs', 'secrets.txt', '.ipynb_checkpoints', '.git', 'data', 'src']
✅ Gemini API key setup complete from secrets.txt. Key starts with: AIza


In [3]:
pip install pydantic pandas fastapi uvicorn python-dotenv

Note: you may need to restart the kernel to use updated packages.


In [3]:
import json, os, re, uuid, time
from typing import List, Dict, Any
from pydantic import BaseModel
from datetime import datetime
import pandas as pd
LOG_DIR = "logs"
os.makedirs(LOG_DIR, exist_ok=True)

In [4]:
def mock_call_gemini(prompt: str, temperature: float = 0.0, max_tokens: int = 512) -> str:
    """
    Simple deterministic mock used for development.
    Replace with actual Vertex AI / Gemini call when ready.
    """
    # Very small heuristic responses for key prompt types.
    if "EXTRACT_RESUME_JSON" in prompt:
        # return minimal JSON string (example)
        return json.dumps({
            "name":"Arun Kumar",
            "title":"Backend Developer",
            "emails":["arun@example.com"],
            "phones":["+919900112233"],
            "skills":["Java","Spring Boot","SQL","REST","Docker"],
            "years_experience":4,
            "experience":[
                {"title":"Backend Developer","company":"Acme","start":"2021-06","end":"2024-09","bullets":["Built REST APIs using Spring Boot."]}
            ],
            "education":["B.Tech Computer Science"]
        })
    if "TAILOR_BULLETS" in prompt:
        return json.dumps({
            "bullets": [
                "Designed and implemented RESTful APIs using Spring Boot, improving response time by 30%.",
                "Built database schemas and optimized SQL queries to reduce average query latency by 25%."
            ]
        })
    if "COVER_LETTER" in prompt:
        return "Dear Hiring Manager,\n\nI am excited to apply for the Backend Developer role at Acme... (sample cover letter)\n"
    if "MATCH_EXPLAIN" in prompt:
        return json.dumps({"match_score":0.78, "reasons":["Matches 5/7 skills"], "gaps":["System design"]})
    # default
    return "[MOCK] " + (prompt[:400].replace("\n"," ") + " ...")

In [5]:
# Load jobs from jobs.json (you can replace content)
with open("data/jobs.json","r") as f:
    JOBS = json.load(f)
len(JOBS)

100

In [6]:
class Resume(BaseModel):
    name: str = None
    title: str = None
    emails: List[str] = []
    phones: List[str] = []
    skills: List[str] = []
    years_experience: int = 0
    experience: List[Dict[str, Any]] = []
    education: List[str] = []

class Job(BaseModel):
    id: str
    title: str
    company: str
    location: str
    remote: bool = False
    skills: List[str] = []
    seniority: str = "mid"
    salary: str = ""
    description: str = ""

In [7]:
#Resume Agent
def resume_text_to_json(resume_text: str) -> dict:
    # quick regex heuristics (email/phone), otherwise fallback to LLM
    emails = re.findall(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}", resume_text)
    phones = re.findall(r"\+?\d[\d \-]{7,}\d", resume_text)
    # try to extract skills heuristically: look for "Skills:" line
    skills = []
    m = re.search(r"Skills[:\s]+(.+)", resume_text, re.IGNORECASE)
    if m:
        skills = [s.strip() for s in re.split(r",|\||;", m.group(1)) if s.strip()]
    # If minimal data only, call LLM mock to get structured JSON
    if len(skills) < 2 or not emails:
        prompt = f"EXTRACT_RESUME_JSON\n\n{resume_text}"
        out = mock_call_gemini(prompt, temperature=0)
        parsed = json.loads(out)
        return parsed
    # otherwise build simple JSON
    parsed = {
        "name": resume_text.splitlines()[0].strip() if resume_text else "Unknown",
        "title": None,
        "emails": emails,
        "phones": phones,
        "skills": skills,
        "years_experience": 3,
        "experience": [],
        "education": []
    }
    return parsed

In [8]:
#Search Agent
def search_jobs(query: str="", filters: Dict[str,Any]=None, top_k: int=20) -> List[Dict]:
    filters = filters or {}
    q = query.lower()
    candidates = []
    for job in JOBS:
        score = 0
        # keyword match between query and job title/desc
        if q:
            if q in job["title"].lower() or q in job["company"].lower() or q in job["description"].lower():
                score += 2
        # simple filter checks
        if filters.get("remote") is True and job.get("remote"):
            score += 1
        if "location" in filters and filters["location"].lower() in job["location"].lower():
            score += 1
        candidates.append((score, job))
    # sort by score desc
    candidates = sorted(candidates, key=lambda x: x[0], reverse=True)
    return [c[1] for c in candidates[:top_k]]

In [9]:
#Matching Agent
def match_resume_to_jobs(resume_json: dict, candidate_jobs: List[dict], top_k=10) -> List[dict]:
    r_skills = set([s.lower() for s in resume_json.get("skills",[])])
    ranked = []
    for job in candidate_jobs:
        j_skills = set([s.lower() for s in job.get("skills",[])])
        if len(j_skills)==0:
            skill_score = 0.0
        else:
            skill_score = len(r_skills & j_skills) / len(j_skills)
        seniority_penalty = 0.0
        # simple seniority mapping
        if job.get("seniority","mid")=="senior" and resume_json.get("years_experience",0) < 5:
            seniority_penalty = 0.2
        score = skill_score - seniority_penalty
        rationale = f"skill_match={len(r_skills & j_skills)}/{len(j_skills)}; seniority_penalty={seniority_penalty}"
        ranked.append({"job":job, "score": round(float(score),3), "rationale":rationale})
    ranked = sorted(ranked, key=lambda x: x["score"], reverse=True)
    return ranked[:top_k]

In [10]:
#Tailor agent

def tailor_resume_bullets(resume_json: dict, job_json: dict) -> List[str]:
    prompt = f"TAILOR_BULLETS\nResumeJSON: {json.dumps(resume_json)}\nJobJSON: {json.dumps(job_json)}"
    out = mock_call_gemini(prompt, temperature=0)
    parsed = json.loads(out)
    return parsed.get("bullets", [])

def generate_cover_letter(resume_json: dict, job_json: dict) -> str:
    prompt = f"COVER_LETTER\nResumeJSON: {json.dumps(resume_json)}\nJobJSON: {json.dumps(job_json)}"
    return mock_call_gemini(prompt, temperature=0.7)

In [14]:
#Explainer Agent

In [11]:
def explain_match(resume_json: dict, job_json: dict) -> dict:
    prompt = f"MATCH_EXPLAIN\nResumeJSON: {json.dumps(resume_json)}\nJobJSON: {json.dumps(job_json)}"
    out = mock_call_gemini(prompt, temperature=0)
    return json.loads(out)

In [12]:
TRACKER_FILE = "data/tracker.json"
if not os.path.exists("data"): os.makedirs("data", exist_ok=True)
if not os.path.exists(TRACKER_FILE):
    with open(TRACKER_FILE,"w") as f:
        json.dump([], f)

def add_application(resume_name: str, job_id: str, artifacts: dict):
    with open(TRACKER_FILE,"r") as f:
        arr = json.load(f)
    rec = {
        "id": str(uuid.uuid4()),
        "applied_at": datetime.utcnow().isoformat(),
        "resume_name": resume_name,
        "job_id": job_id,
        "artifacts": artifacts
    }
    arr.append(rec)
    with open(TRACKER_FILE,"w") as f:
        json.dump(arr, f, indent=2)
    return rec

In [14]:
#End to End Demo run

# load a sample resume text (I'll provide 3 samples below)
sample_resume_text = open("data/resumes/resume_3.txt").read()
resume_json = resume_text_to_json(sample_resume_text)
print("Parsed resume:", resume_json)

candidates = search_jobs(query="Data Scientist", filters={"remote":True}, top_k=20)
ranked = match_resume_to_jobs(resume_json, candidates, top_k=5)
print("Top matches:")
for r in ranked:
    jid = r["job"]["id"]
    print(r["score"], r["job"]["title"], r["job"]["company"], "->", r["rationale"])
    # generate bullets for top1 only
    if r==ranked[0]:
        bullets = tailor_resume_bullets(resume_json, r["job"])
        letter = generate_cover_letter(resume_json, r["job"])
        print("Tailored bullets:", bullets)
        print("Cover letter (excerpt):", letter[:300])
        rec = add_application(resume_json.get("name","unknown"), jid, {"bullets":bullets, "cover_letter": letter[:200]})
        print("Application recorded:", rec["id"])

Parsed resume: {'name': 'Amit Sharma', 'title': None, 'emails': ['amit.sharma@example.com'], 'phones': [], 'skills': ['Node.js', 'Express.js', 'MongoDB', 'Redis', 'AWS Lambda'], 'years_experience': 3, 'experience': [], 'education': []}
Top matches:
0.75 Backend Developer - Node.js StackBridge -> skill_match=3/4; seniority_penalty=0.0
Tailored bullets: ['Designed and implemented RESTful APIs using Spring Boot, improving response time by 30%.', 'Built database schemas and optimized SQL queries to reduce average query latency by 25%.']
Cover letter (excerpt): Dear Hiring Manager,

I am excited to apply for the Backend Developer role at Acme... (sample cover letter)

Application recorded: 6b5077f3-9382-4db8-be35-94e4a090306b
0.667 Full Stack Engineer StackUp -> skill_match=2/3; seniority_penalty=0.0
0.0 Data Scientist DataSense -> skill_match=0/4; seniority_penalty=0.0
0.0 Backend Developer Acme Tech -> skill_match=0/4; seniority_penalty=0.0
0.0 Frontend Engineer BlueWave -> skill_match=0/

  "applied_at": datetime.utcnow().isoformat(),


In [15]:
#Test notebook cell

import json, glob
from src.agents.resume_agent import resume_to_json

results = {}

for file in sorted(glob.glob("data/resumes/*.txt")):
    text = open(file).read()
    parsed = resume_to_json(text)
    results[file] = parsed
    print(file, "->", parsed["skills"], parsed["emails"])

# save parsed outputs
with open("data/parsed_resumes.json", "w") as f:
    json.dump(results, f, indent=2)

data/resumes/resume_1.txt -> ['Java', 'Spring Boot', 'Microservices', 'REST APIs', 'MySQL', 'Docker', 'Kafka'] ['rahul.nair@example.com']
data/resumes/resume_10.txt -> ['Python', 'TensorFlow', 'PyTorch', 'NLP', 'Computer Vision', 'Docker'] ['divya.s@example.com']
data/resumes/resume_11.txt -> ['MLflow', 'Docker', 'Kubernetes', 'Python', 'Airflow'] ['vikram.s@example.com']
data/resumes/resume_12.txt -> ['Docker', 'Kubernetes', 'Terraform', 'AWS', 'CI/CD', 'Linux'] ['ganesh.prasad@example.com']
data/resumes/resume_13.txt -> ['AWS', 'GCP', 'Terraform', 'Jenkins', 'Kubernetes'] ['deepa.raj@example.com']
data/resumes/resume_14.txt -> ['Selenium', 'Python', 'PyTest', 'Postman', 'CI/CD'] ['kiran.k@example.com']
data/resumes/resume_15.txt -> ['Product Strategy', 'Roadmaps', 'PRDs', 'Data Analysis', 'User Research'] ['nisha.v@example.com']
data/resumes/resume_16.txt -> ['Figma', 'Wireframes', 'Prototyping', 'User Research', 'Visual Design'] ['harini.s@example.com']
data/resumes/resume_17.txt ->

In [16]:
#Test Job loader

from src.data.job_loader import load_jobs
jobs = load_jobs()
print("Total jobs loaded:", len(jobs))
print(jobs[0])

Total jobs loaded: 100
id='job-001' title='Backend Developer' company='Acme Tech' location='Bengaluru, India' remote=True skills=['Java', 'Spring Boot', 'REST', 'SQL'] seniority='mid' salary='12-18 LPA' description='Build backend microservices and REST APIs.'


In [17]:
# Test Search Agent
from src.data.job_loader import load_jobs
from src.agents.search_agent import SearchAgent
from src.utils.logger import log_search

jobs = load_jobs()
search_agent = SearchAgent(jobs)

In [18]:
results = search_agent.search(
    query="Android",
    filters={"remote": True},
    top_k=10
)

for r in results:
    print(r.id, r.title, r.skills)

job-013 Android Engineer - Jetpack ['Kotlin', 'Jetpack Compose', 'Coroutines']
job-012 Android Developer ['Kotlin', 'Android', 'MVVM']
job-001 Backend Developer ['Java', 'Spring Boot', 'REST', 'SQL']
job-004 Backend Developer - Node.js ['Node.js', 'Express', 'MongoDB', 'Redis']
job-006 Frontend Engineer ['JavaScript', 'React', 'CSS']
job-008 React Engineer ['React', 'TypeScript', 'Redux', 'Next.js']
job-009 Full Stack Engineer ['Node.js', 'React', 'MongoDB']
job-014 iOS Developer ['Swift', 'UIKit', 'SwiftUI']
job-018 Deep Learning Engineer ['PyTorch', 'CNNs', 'Transformers']
job-021 AI Engineer ['LLMs', 'Python', 'VectorDB']


In [19]:
#Test Log Search
log_search("backend", {"remote":True}, results)

In [20]:
log_search("Android", {"remote":True}, results)

In [21]:
from src.data.job_loader import load_jobs
from src.agents.search_agent import SearchAgent
from src.agents.matching_agent import MatchingAgent
from src.agents.resume_agent import resume_to_json
from src.utils.match_logger import log_match

jobs = load_jobs()
search_agent = SearchAgent(jobs)
matcher = MatchingAgent()


In [22]:
#Test Resume Agent and Job Matcher Agent
resume = open("data/resumes/resume_1.txt").read()
resume_json = resume_to_json(resume)

candidates = search_agent.search("backend")
ranked = matcher.match(resume_json, candidates, top_k=5)

for r in ranked:
    print(r["job"].id, r["job"].title, "->", r["score"], r["explanation"])

job-001 Backend Developer -> 0.3 {'skill_overlap': '2/4', 'skill_overlap_list': ['java', 'spring boot'], 'seniority_penalty': 0, 'location_score': 0, 'remote_score': 0}
job-005 Java Developer -> 0.3 {'skill_overlap': '2/4', 'skill_overlap_list': ['java', 'mysql'], 'seniority_penalty': 0, 'location_score': 0, 'remote_score': 0}
job-047 Backend Developer - Go -> 0.2 {'skill_overlap': '1/3', 'skill_overlap_list': ['microservices'], 'seniority_penalty': 0, 'location_score': 0, 'remote_score': 0}
job-084 Backend Engineer - PHP -> 0.2 {'skill_overlap': '1/3', 'skill_overlap_list': ['mysql'], 'seniority_penalty': 0, 'location_score': 0, 'remote_score': 0}
job-002 Software Engineer - Backend -> 0.15 {'skill_overlap': '1/4', 'skill_overlap_list': ['docker'], 'seniority_penalty': 0, 'location_score': 0, 'remote_score': 0}


In [23]:
resume = open("data/resumes/resume_4.txt").read()
resume_json = resume_to_json(resume)

candidates = search_agent.search("backend")
ranked = matcher.match(resume_json, candidates, top_k=5)

for r in ranked:
    print(r["job"].id, r["job"].title, "->", r["score"], r["explanation"])

job-006 Frontend Engineer -> 0.6 {'skill_overlap': '3/3', 'skill_overlap_list': ['react', 'javascript', 'css'], 'seniority_penalty': 0, 'location_score': 0, 'remote_score': 0}
job-007 Frontend Developer - Vue.js -> 0.3 {'skill_overlap': '3/4', 'skill_overlap_list': ['tailwind', 'javascript', 'html'], 'seniority_penalty': 0.5, 'location_score': 0, 'remote_score': 0}
job-011 Full Stack Engineer - MERN -> 0.15 {'skill_overlap': '1/4', 'skill_overlap_list': ['react'], 'seniority_penalty': 0, 'location_score': 0, 'remote_score': 0}
job-009 Full Stack Engineer -> 0.05 {'skill_overlap': '1/3', 'skill_overlap_list': ['react'], 'seniority_penalty': 0.5, 'location_score': 0, 'remote_score': 0}
job-069 Backend Intern -> 0.0 {'skill_overlap': '0/2', 'skill_overlap_list': [], 'seniority_penalty': 0, 'location_score': 0, 'remote_score': 0}


In [24]:
resume = open("data/resumes/resume_19.txt").read()
resume_json = resume_to_json(resume)

candidates = search_agent.search("backend")
ranked = matcher.match(resume_json, candidates, top_k=5)

for r in ranked:
    print(r["job"].id, r["job"].title, "->", r["score"], r["explanation"])

job-096 Backend Developer - .NET -> 0.05 {'skill_overlap': '1/3', 'skill_overlap_list': ['c#'], 'seniority_penalty': 0.5, 'location_score': 0, 'remote_score': 0}
job-069 Backend Intern -> 0.0 {'skill_overlap': '0/2', 'skill_overlap_list': [], 'seniority_penalty': 0, 'location_score': 0, 'remote_score': 0}
job-005 Java Developer -> 0.0 {'skill_overlap': '0/4', 'skill_overlap_list': [], 'seniority_penalty': 0, 'location_score': 0, 'remote_score': 0}
job-006 Frontend Engineer -> 0.0 {'skill_overlap': '0/3', 'skill_overlap_list': [], 'seniority_penalty': 0, 'location_score': 0, 'remote_score': 0}
job-011 Full Stack Engineer - MERN -> 0.0 {'skill_overlap': '0/4', 'skill_overlap_list': [], 'seniority_penalty': 0, 'location_score': 0, 'remote_score': 0}


In [25]:
#Test Personalization Agent

from src.agents.personalization_agent import PersonalizationAgent
from src.utils.personalization_logger import log_personalization

pers = PersonalizationAgent()

In [26]:
#Test with Resume

resume_text = open("data/resumes/resume_1.txt").read()
resume_json = resume_to_json(resume_text)

candidates = search_agent.search("backend")
matched = matcher.match(resume_json, candidates)

preferences = {
    "preferred_locations": ["Remote"],
    "preferred_roles": ["backend"],
    "min_salary": 12,
    "remote": True
}

personalized = pers.apply(matched, preferences)

for p in personalized[:5]:
    print(p["job"].id, p["job"].title, "->", p["personalized_score"], p["bonus"])


job-001 Backend Developer -> 0.95 0.65
job-004 Backend Developer - Node.js -> 0.8 0.8
job-047 Backend Developer - Go -> 0.65 0.45
job-003 Backend Engineer - Java -> 0.6 0.45
job-069 Backend Intern -> 0.55 0.55


In [32]:
from src.utils.llm import call_gemini
call_gemini("HELLO")

'{"response": "[MOCK RESPONSE]"}'

In [33]:
from src.agents.tailor_agent import TailorAgent
from src.utils.save_utils import save_tailored_resume

tailor = TailorAgent()


In [34]:
resume_json = resume_to_json(open("data/resumes/resume_1.txt").read())

candidates = search_agent.search("backend")
matched = matcher.match(resume_json, candidates)
job_json = matched[0]["job"].model_dump()  # best job


In [35]:
tailored = tailor.generate_bullets(resume_json, job_json)
print(tailored)


{'summary': 'Experienced developer skilled in delivering backend services.', 'bullets': ['Developed mock microservices using Python and Java.', 'Improved API performance through caching strategies.', 'Integrated SQL databases for faster data access.'], 'inserted_keywords': ['Python', 'SQL', 'microservices']}


In [37]:
from src.agents.application_agent import ApplicationAgent
from src.utils.application_utils import save_application_bundle
from src.utils.application_logger import log_application

app_agent = ApplicationAgent()


In [40]:
resume_json = resume_to_json(open("data/resumes/resume_1.txt").read())
candidates = search_agent.search("backend")
matched = matcher.match(resume_json, candidates)
job_json = matched[0]["job"].model_dump()

tailored = tailor.generate_bullets(resume_json, job_json)


In [41]:
cover_letter = app_agent.generate_cover_letter(resume_json, job_json, tailored)
print(cover_letter)


Dear Hiring Manager,

I am excited to apply for the role...



In [42]:
bundle_path = save_application_bundle("resume_1", job_json["id"], tailored, cover_letter)
bundle_path


'out/applications/resume_1_job-001.json'

In [43]:
log_application("resume_1", job_json["id"], cover_letter)


In [44]:
from src.agents.tracker_agent import ApplicationTrackerAgent
from src.utils.tracker_logger import log_tracker_event

tracker = ApplicationTrackerAgent()


In [45]:
record = tracker.add_application(
    resume_name="sample_resume_1",
    job_id=job_json["id"],
    tailored_resume=tailored,
    cover_letter=cover_letter
)

log_tracker_event("created", record)

record


{'id': '8a5273ab-5406-4013-869f-e21e991ec4a5',
 'timestamp': 1764138203.798825,
 'resume_name': 'sample_resume_1',
 'job_id': 'job-001',
 'status': 'applied',
 'tailored_resume': {'summary': 'Experienced developer skilled in delivering backend services.',
  'bullets': ['Developed mock microservices using Python and Java.',
   'Improved API performance through caching strategies.',
   'Integrated SQL databases for faster data access.'],
  'inserted_keywords': ['Python', 'SQL', 'microservices']},
 'cover_letter': 'Dear Hiring Manager,\n\nI am excited to apply for the role...\n'}

In [46]:
tracker.update_status(record["id"], "interview_scheduled")
log_tracker_event("update_status", {"id": record["id"], "new_status": "interview_scheduled"})


In [47]:
tracker.read_all()


[{'id': '8a5273ab-5406-4013-869f-e21e991ec4a5',
  'timestamp': 1764138203.798825,
  'resume_name': 'sample_resume_1',
  'job_id': 'job-001',
  'status': 'interview_scheduled',
  'tailored_resume': {'summary': 'Experienced developer skilled in delivering backend services.',
   'bullets': ['Developed mock microservices using Python and Java.',
    'Improved API performance through caching strategies.',
    'Integrated SQL databases for faster data access.'],
   'inserted_keywords': ['Python', 'SQL', 'microservices']},
  'cover_letter': 'Dear Hiring Manager,\n\nI am excited to apply for the role...\n'}]

In [48]:
from src.agents.tracker_reporter import TrackerReporter
reporter = TrackerReporter()
reporter.export_csv()


'out/application_report.csv'