In [26]:
from langchain_core.tools import tool
from utils.resume_parser import ResumeParser
import PyPDF2
from langchain_groq import ChatGroq
from dotenv import load_dotenv
from langgraph.prebuilt import create_react_agent
from config import GROQ_API_KEY, LLM_MODEL

In [27]:
load_dotenv()

True

In [28]:
resume_parser = ResumeParser()

In [29]:
llm = ChatGroq(model="openai/gpt-oss-20b")

### Prompts:

In [30]:
RESUME_ANALYSIS_PROMPT= """
            From given Skills, Education and Experience, Analyze this resume information and provide specific, actionable suggestions 
            for improvement to make it more competitive in the job market.
            
            === RESUME DATA ===
            
            Skills: {skills}
            
            Education: {education}
            
            Experience: {experience}
            
            === ANALYSIS INSTRUCTIONS ===
            
            Provide a comprehensive analysis with the following clearly labeled sections:
            
            1. OVERALL ASSESSMENT
            • Strengths: Identify 3-5 strong aspects of the resume
            • Weaknesses: Point out 2-4 areas that need improvement
            • Industry fit: Based on the skills and experience, suggest 2-3 suitable industry sectors or job roles
            
            2. CONTENT IMPROVEMENTS
            • Achievements: Suggest how to better quantify results (provide 2-3 examples of how to reword vague statements)
            • Skills presentation: Advise on better organization or presentation of technical skills
            • Missing skills: Identify any critical skills that seem to be missing based on the experience described
            
            3. FORMAT SUGGESTIONS
            • Structure: Suggest optimal resume sections and ordering
            • Length: Advise on appropriate length based on experience level
            • Readability: Provide tips to improve scannability
            
            4. ATS OPTIMIZATION
            • Keywords: Suggest 5-7 additional keywords to include for better ATS matching
            • Formatting pitfalls: Identify any elements that could harm ATS parsing
            • File format recommendations
            
            Be extremely specific and actionable in your suggestions. Provide concrete examples where possible.
            Focus on transformative improvements rather than minor tweaks.
            """

### Tool-1: Agent for resume Analysis:

In [31]:
@tool
def analyze_resume(skills: str, education: str, experience: str):
    "This tool is use in case user is asking for resume analysis"
    # prompt = RESUME_ANALYSIS_PROMPT.format({"skills": skills, "education": education, "experience": experience})
    prompt = RESUME_ANALYSIS_PROMPT.format(skills=skills, education=education, experience=experience)

    try:
        analysis =  llm.invoke(prompt).content
    except:
        # Basic strengths analysis
        strengths = []
        if len(skills) >= 5:
            strengths.append("Good range of technical skills")
        if len(experience) >= 3:
            strengths.append("Solid work experience")
        if any("machine learning" in skill.lower() or "ai" in skill.lower() for skill in skills):
            strengths.append("Valuable AI/ML skills that are in high demand")
        
        analysis += "Strengths:\n"
        for strength in strengths or ["Resume contains some relevant skills"]:
            analysis += f"• {strength}\n"
        
        # Basic weaknesses analysis
        weaknesses = []
        if len(skills) < 5:
            weaknesses.append("Limited range of technical skills listed")
        if not any("python" in skill.lower() for skill in skills):
            weaknesses.append("Python (a widely used programming language) not explicitly listed")
        
        analysis += "\nWeaknesses:\n"
        for weakness in weaknesses or ["Consider adding more specific technical skills"]:
            analysis += f"• {weakness}\n"
        
        # Content improvements
        analysis += "\nCONTENT IMPROVEMENTS\n\n"
        analysis += "• Consider quantifying your achievements with specific metrics\n"
        analysis += "• Organize skills by category (programming languages, frameworks, tools)\n"
        analysis += "• Focus on highlighting relevant skills for your target roles\n"
        
        # Format suggestions
        analysis += "\nFORMAT SUGGESTIONS\n\n"
        analysis += "• Use a clean, ATS-friendly format with clear section headings\n"
        analysis += "• Ensure consistent formatting (bullet points, dates, etc.)\n"
        analysis += "• Keep resume to 1-2 pages maximum\n"
        
        # ATS optimization
        analysis += "\nATS OPTIMIZATION\n\n"
        analysis += "• Use keywords from job descriptions in your resume\n"
        analysis += "• Save your resume as a PDF to maintain formatting\n"
        analysis += "• Avoid tables, headers/footers, and images that can confuse ATS systems\n"
    
    return analysis

### Tool-2: Agent for Job Search:

In [32]:
from config import JOB_PLATFORMS
from utils.job_scraper import JobScraper
from utils.serp_api_searcher import SerpApiSearcher

In [33]:
@tool
def job_search_tool(keywords, location, platforms=None, count=5):
        """
        Search for jobs based on resume and keywords.
        
        Args:
            # resume_data (dict): The parsed resume data
            keywords (str): Search keywords or job title
            location (str): Job location
            platforms (list): List of job platforms to search
            count (int): Number of jobs per platform
            
        Returns:
            list: List of job dictionaries
        """
        job_scraper = JobScraper()
        serp_api_searcher = SerpApiSearcher()

        if not platforms:
            platforms = JOB_PLATFORMS
            
        # Try the SerpAPI approach first (this will have real links)
        api_jobs = []
        
        for platform in platforms:
            # Use SerpAPI to search for real jobs
            platform_jobs = serp_api_searcher.search_jobs(
                keywords, 
                location, 
                platform=platform, 
                count=count
            )
            api_jobs.extend(platform_jobs)
        
        # If we got results from SerpAPI, use those
        if api_jobs:
            return api_jobs
        
        # Fallback to the scraper if SerpAPI fails
        print("SerpAPI search returned no results. Falling back to scraper.")
        all_jobs = []
        for platform in platforms:
            platform_jobs = job_scraper.search_jobs(
                keywords, 
                location, 
                platform=platform, 
                count=count
            )
            all_jobs.extend(platform_jobs)
        
        return all_jobs

In [34]:
from pydantic import BaseModel, Field
from langchain.tools import StructuredTool

class JobMatchInput(BaseModel):
    resume_data: dict = Field(..., description="Parsed resume data as a dict")
    job_data: dict = Field(..., description="Job description data as a dict")

In [35]:
class JobAnalyzer:
    def __init__(self,api_key):
        self.api_key = api_key

    def __call__(self, resume_data, job_data):
        # You can replace this with your real logic
        return {
            "match_score": 82,
            "key_matches": ["Python", "Data Analysis"],
            "gaps": ["Cloud experience"],
            "recommendations": ["Add AWS project examples"]
        }
    
    def get_job_match_analysis(self, resume_data, job_data):
        """
        Analyze how well a resume matches a job description.
        
        Args:
            resume_data (dict): The parsed resume data
            job_data (dict): The job listing data
        
        Returns:
            dict: Match analysis with score and recommendations
        """
        if not self.api_key:
            return self._generate_basic_match_analysis(resume_data, job_data)
        
        try:
            # Initialize OpenAI client
            # client = OpenAI(api_key=self.api_key, model=self.model)
            
            # Extract relevant data
            skills = resume_data.get("skills", [])
            experience = resume_data.get("experience", [])
            job_title = job_data.get("title", "")
            job_description = job_data.get("description", "")
            
            # Create a prompt for matching analysis
            prompt = f"""
            Analyze how well this resume matches the job description and provide a detailed match analysis.
            
            === RESUME DATA ===
            Skills: {", ".join(skills)}
            
            Experience:
            {chr(10).join([f"- {exp}" for exp in experience])}
            
            === JOB DATA ===
            Title: {job_title}
            
            Description:
            {job_description}
            
            === ANALYSIS INSTRUCTIONS ===
            
            Provide a match analysis with the following components:
            
            1. MATCH SCORE: Calculate a percentage match (0-100%) based on how well the resume matches the job requirements.
            
            2. KEY MATCHES: List 3-5 specific skills or experiences from the resume that align well with the job requirements.
            
            3. GAPS: Identify 2-4 requirements in the job description that are not clearly demonstrated in the resume.
            
            4. RECOMMENDATIONS: Suggest 3-5 specific actions the candidate can take to better position themselves for this role.
            
            Format your response as a JSON with the following structure:
            {{
                "match_score": 85,
                "key_matches": ["match1", "match2", ...],
                "gaps": ["gap1", "gap2", ...],
                "recommendations": ["rec1", "rec2", ...]
            }}
            
            Ensure your analysis is specific, objective, and focused on the actual content in the resume and job description.
            """
            
            # Get analysis from OpenAI
            response = llm.create(
                model=self.model,
                prompt=prompt,
                max_tokens=1000,
                temperature=0.5
            )
            
            # Parse the response as JSON
            try:
                import json
                analysis = json.loads(response.choices[0].message.content.strip())
                return analysis
            except json.JSONDecodeError:
                # If JSON parsing fails, return the raw text
                return {"match_analysis": response.choices[0].message.content.strip()}
            
        except Exception as e:
            print(f"Error in job match analysis: {e}")
            return self._generate_basic_match_analysis(resume_data, job_data)
    
    def _generate_basic_match_analysis(self, resume_data, job_data):
        """Generate basic job match analysis when OpenAI is not available."""
        skills = resume_data.get("skills", [])
        job_description = job_data.get("description", "").lower()
        
        # Count matching skills
        matching_skills = [skill for skill in skills if skill.lower() in job_description]
        
        # Calculate a simple match score
        match_score = min(len(matching_skills) * 10, 100) if skills else 50
        
        return {
            "match_score": match_score,
            "key_matches": matching_skills[:5],
            "gaps": ["Unable to analyze gaps without AI processing"],
            "recommendations": [
                "Review the job description and identify key requirements",
                "Customize your resume to highlight relevant skills and experience",
                "Add any missing skills that you possess but aren't in your resume"
            ]
        }

In [36]:
# Wrap the callable class
job_match_tool = StructuredTool.from_function(
    func=JobAnalyzer(api_key=GROQ_API_KEY),
    name="job_match_analysis",
    description="Analyze how well a resume matches a given job description.",
    args_schema=JobMatchInput
)

In [37]:
# agent_executor = create_react_agent([analyze_resume],llm,agent="zero-shot-react-description", verbose=True)
agent_executor = create_react_agent(llm, [analyze_resume,job_search_tool,job_match_tool], debug=True)

In [38]:
temp_path  = r"U:\generative_ai_projects\job_search_assistant\atharva_mishra_resume.pdf"

In [39]:
def extract_text(temp_path):
    extracted_text = ""
    if temp_path.endswith('.pdf'):
        with open(temp_path, 'rb') as f:
            pdf_reader = PyPDF2.PdfReader(f)
            for page_num in range(len(pdf_reader.pages)):
                page = pdf_reader.pages[page_num]
                extracted_text += page.extract_text() + "\n"
    return extracted_text

In [40]:
extracted_text = extract_text(temp_path)

In [41]:
resume_data = resume_parser.parse_resume(extracted_text)

In [42]:
skills = ", ".join(resume_data.get("skills", []))

education_list = resume_data.get("education", [])
education = "\n".join([f"- {edu}" for edu in education_list])

experience_list = resume_data.get("experience", [])
experience = "\n".join([f"- {exp}" for exp in experience_list])

In [43]:
# result = analyze_resume.invoke({
#     "skills": skills,
#     "education": education,
#     "experience": experience
# })
# print(result)

In [44]:
### Testing:

In [45]:
#### Testing resume analysis tool:

In [46]:
input_message = {
    "role": "user",
    "content": f"Please analyze this resume with the following details:\n\nSkills: {skills}\n\nEducation:\n{education}\n\nExperience:\n{experience}"
}

response = agent_executor.invoke({"messages": [input_message]})

[1m[values][0m {'messages': [HumanMessage(content='Please analyze this resume with the following details:\n\nSkills: data science, scikit-learn, keras, html, Oriental Institute Of Science Of Technology, Pytorch, pandas, git, python, VScode, Python, tensorflow, computer vision, sql, numpy, Hindi, Genetic Variants, machine learning, mongodb, Spyder, Machine Learning, EDA, NLTK, pytorch, deep learning, Commputer Vision, iNeuron, css, flask, Matplotlib, Keras\n\nEducation:\n- Institute\n- h, Hindi.\nCertifications: Full Stack Data Science (iNeuron), Python(Durgasoft)  Mar 2013 - Mar 2015\nOriental Institute Of Science Of Technology (Bachelor Of Engineering)                                                    Aug 2015 - Oct 2020Khu\n\nExperience:\n- 15\nOriental Institute Of Science Of Technology (Bachelor Of Engineering)                                                    Aug 2015 - Oct 2020Khusbu Yadav Inter Collage (12th)Developed an Image Captioning Web Application that generates descri

In [47]:
type(response)

dict

In [48]:
from langchain.schema import AIMessage
from langchain_core.messages import ToolMessage

# Extract final content
if isinstance(response, dict) and "messages" in response:
    for msg in response["messages"]:
        if isinstance(msg, ToolMessage) or (
            isinstance(msg, dict) and msg.get("name") == "analyze_resume"
        ):
            print(msg["content"] if isinstance(msg, dict) else msg.content)


**1. OVERALL ASSESSMENT**

| Category | Findings |
|----------|----------|
| **Strengths** | 1. **Hands‑on project portfolio** – Image captioning web app & flight‑price prediction platform show end‑to‑end skills (data prep → model → deployment). <br>2. **Broad technical stack** – Includes deep‑learning libraries (PyTorch, TensorFlow, Keras), data‑science tools (pandas, NumPy, scikit‑learn), and web dev tech (Flask, HTML/CSS). <br>3. **Clear passion statement** – “Passionate about leveraging AI to solve real‑world problems” signals motivation and fit for data‑driven teams. |
| **Weaknesses** | 1. **Redundant & inconsistent skill listings** – “Python” appears twice; “commputer vision” typo; “pytorch” repeated. <br>2. **Lack of quantifiable impact** – Project descriptions are generic (“performed full EDA”) without metrics. <br>3. **Missing soft skills & certifications** – No mention of teamwork, communication, or relevant certificates (e.g., AWS AI/ML, Coursera Deep Learning). <br>4. **Ed

In [49]:
#### Testing job search tool:

In [50]:
# #job_search_tool(resume_data, keywords, location, platforms=None, count=5)
# keywords = "Agentic AI intern"
# result = job_search_tool.invoke({
#     "resume_data": resume_data,
#     "keywords": keywords,
#     "location": "Bangalore"
# })
# # print(result)

In [51]:
# import pandas as pd

In [52]:
# available_jobs = pd.DataFrame(result)

In [53]:
# available_jobs.head()

In [54]:
#job_search_tool(resume_data, keywords, location, platforms=None, count=5)
keywords = "Machine Learning Engineer"
location = "Bangalore"

In [55]:
input_message = {
    "role": "user",
    "content": f"Please search job with the following details:\n\keywords:\n{keywords}\n\location:\n{location}"
}

response = agent_executor.invoke({"messages": [input_message]})
# print(response)

[1m[values][0m {'messages': [HumanMessage(content='Please search job with the following details:\n\\keywords:\nMachine Learning Engineer\n\\location:\nBangalore', additional_kwargs={}, response_metadata={}, id='dd51fdcd-11e7-4567-9163-fced15cbfbce')]}
[1m[updates][0m {'agent': {'messages': [AIMessage(content='', additional_kwargs={'reasoning_content': 'We need to call job_search_tool. Provide arguments: keywords: "Machine Learning Engineer", location: "Bangalore". Count default 5. Use function.', 'tool_calls': [{'id': 'fc_6f81a2c6-baec-45f4-abea-846188fb184b', 'function': {'arguments': '{"count":5,"keywords":"Machine Learning Engineer","location":"Bangalore"}', 'name': 'job_search_tool'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 68, 'prompt_tokens': 345, 'total_tokens': 413, 'completion_time': 0.066375762, 'prompt_time': 0.005356025, 'queue_time': 0.048983925, 'total_time': 0.071731787, 'prompt_tokens_details': {'cached_tokens': 256}}, 'model_n

In [56]:
import pprint

In [57]:
from langchain.schema import AIMessage
from langchain_core.messages import ToolMessage

# Extract final content
if isinstance(response, dict) and "messages" in response:
    for msg in response["messages"]:
        if isinstance(msg, ToolMessage) or (
            isinstance(msg, dict) and msg.get("name") == "analyze_resume"
        ):
            pprint.pprint(msg["content"] if isinstance(msg, dict) else msg.content)

('[{"title": "Machine Learning Engineer, Trilogy (Remote) - $100,000/year '
 'USD", "company": "Trilogy", "location": "Anywhere", "description": "Ready to '
 "leverage your mastery of LLMs to drive productivity? At Trilogy, we're "
 "opening doors to an exceptional tech career, welcoming those who've honed "
 "their AI skills to elevate their expertise in a dynamic environment. We're "
 'offering a rare chance where your primary focus will be to further expand '
 'your proficiency in LLMs.\\n\\nWhat You Will Be Doing\\n• Designing and '
 'building high-quality AI automations to streamline processes, enhance '
 'productivity, and deliver robust, scalable solutions across diverse '
 'applications\\n• Experimenting with state-of-the-art AI tools like GPT-4 '
 'Vision and Amazon CodeWhisperer, integrating them into our developmental '
 'process to assess and enhance their utility\\n• Evaluating and optimizing '
 'the implementation of AI solutions across various infrastructures, including 

In [None]:
import json
import pandas as pd
import streamlit as st

def clean_jobs_result():
    parsed_results = []

    if isinstance(response, dict) and "messages" in response:
        for msg in response["messages"]:
            # Identify tool output messages (like analyze_resume)
            if isinstance(msg, ToolMessage) or (
                isinstance(msg, dict) and msg.get("name") == "analyze_resume"
            ):
                # Extract the content field correctly
                content = msg["content"] if isinstance(msg, dict) else msg.content

                try:
                    # Try to parse JSON string content
                    data = json.loads(content)
                
                    # If it's a list of dicts → extend list
                    if isinstance(data, list):
                        parsed_results.extend(data)
                    # If it's a single dict → append directly
                    elif isinstance(data, dict):
                        parsed_results.append(data)
                    else:
                        print("⚠️ Skipped non-dict content:", type(data))
                except json.JSONDecodeError:
                    print("❌ Could not parse JSON content:", content[:200])
    
    if parsed_results:
        st.session_state.jobs = pd.DataFrame(parsed_results)
    else:
        st.write("No valid data found.")

In [58]:
import json
import pandas as pd
from langchain.schema import AIMessage
from langchain_core.messages import ToolMessage

# Create an empty list to hold parsed dicts
parsed_results = []

if isinstance(response, dict) and "messages" in response:
    for msg in response["messages"]:
        # Identify tool output messages (like analyze_resume)
        if isinstance(msg, ToolMessage) or (
            isinstance(msg, dict) and msg.get("name") == "analyze_resume"
        ):
            # Extract the content field correctly
            content = msg["content"] if isinstance(msg, dict) else msg.content

            try:
                # Try to parse JSON string content
                data = json.loads(content)
                
                # If it's a list of dicts → extend list
                if isinstance(data, list):
                    parsed_results.extend(data)
                # If it's a single dict → append directly
                elif isinstance(data, dict):
                    parsed_results.append(data)
                else:
                    print("⚠️ Skipped non-dict content:", type(data))
            except json.JSONDecodeError:
                print("❌ Could not parse JSON content:", content[:200])

# ✅ Convert final list to DataFrame
if parsed_results:
    df = pd.DataFrame(parsed_results)
    df.head()
else:
    print("No valid data found.")

In [59]:
df.head()

Unnamed: 0,title,company,location,description,url,apply_url,date_posted,platform,job_type,is_real_job
0,"Machine Learning Engineer, Trilogy (Remote) - ...",Trilogy,Anywhere,Ready to leverage your mastery of LLMs to driv...,https://in.linkedin.com/jobs/view/machine-lear...,https://in.linkedin.com/jobs/view/machine-lear...,23 hours ago,LinkedIn,Full-time,True
1,Freelance Trainer – GCP & Machine Learning Eng...,STEMForge Tech Labs,"Bengaluru, Karnataka, India",Job Title: Freelance Trainer – GCP Professiona...,https://in.linkedin.com/jobs/view/freelance-tr...,https://in.linkedin.com/jobs/view/freelance-tr...,1 day ago,LinkedIn,Part-time,True
2,Data Scientist ML Engineer,e-Hireo,"Bengaluru, Karnataka, India",JOB DESCRIPTION\n\nExperience : 8 - 10 Yrs\n\n...,https://in.linkedin.com/jobs/view/data-scienti...,https://in.linkedin.com/jobs/view/data-scienti...,1 day ago,LinkedIn,Full-time,True
3,Advanced Generative AI Engineer || India (imme...,Ampstek,"Bengaluru, Karnataka, India",(Only for Immediate Joiner and should be ready...,https://in.linkedin.com/jobs/view/advanced-gen...,https://in.linkedin.com/jobs/view/advanced-gen...,1 day ago,LinkedIn,Contractor,True
4,Lead Artificial Intelligence Engineer,OrbitronAI.com,"Bengaluru, Karnataka, India",OrbitronAI is on a mission to turn large langu...,https://in.linkedin.com/jobs/view/lead-artific...,https://in.linkedin.com/jobs/view/lead-artific...,1 day ago,LinkedIn,Full-time,True


In [60]:
resume_data  = resume_data
job_data = df.iloc[0:1]["description"][0]

In [61]:
input_message = {
    "role": "user",
    "content": f"Based on provided resume data and job description Analyze how well a resume matches a given job description.:\n\resume_data: {resume_data}\n\job_data:\n{job_data}"
}

response = agent_executor.invoke({"messages": [input_message]})
# print(response)

[1m[values][0m {'messages': [HumanMessage(content="Based on provided resume data and job description Analyze how well a resume matches a given job description.:\n\resume_data: {'raw_text': 'Languages: English, Hindi.\\nCertifications: Full Stack Data Science (iNeuron), Python(Durgasoft)  Mar 2013 - Mar 2015\\nOriental Institute Of Science Of Technology (Bachelor Of Engineering)                                                    Aug 2015 - Oct 2020Khusbu Yadav Inter Collage (12th)Developed an Image Captioning Web Application that generates descriptive captions for\\nuploaded images using deep learning and computer vision. The system leverages\\nVGG16 for feature extraction and an LSTM-based model for caption generation.\\nThe application is built using Flask for the backend, with a frontend developed using\\nHTML, CSS\\nIt includes user authentication (signup & login), image uploading, and user activity\\ntracking, with all data stored in MongoDB.\\nFlight Price Prediction\\nCreated a

In [62]:
from langchain.schema import AIMessage
from langchain_core.messages import ToolMessage

# Extract final content
if isinstance(response, dict) and "messages" in response:
    for msg in response["messages"]:
        if isinstance(msg, ToolMessage) or (
            isinstance(msg, dict) and msg.get("name") == "analyze_resume"
        ):
            print(msg["content"] if isinstance(msg, dict) else msg.content)

{"match_score": 82, "key_matches": ["Python", "Data Analysis"], "gaps": ["Cloud experience"], "recommendations": ["Add AWS project examples"]}
