We are going to use Adzuna to fetch jobs based on users' preference

In [1]:
import requests
import bs4
import json
import ollama
import os
from dotenv import load_dotenv

In [39]:
from langchain_ollama import ChatOllama

In [2]:
load_dotenv()
ADZUNA_API_KEY = os.getenv('ADZUNA_API_KEY')
ADZUNA_APP_ID = os.getenv('ADZUNA_APP_ID')
ADZUNA_BASE_URL = os.getenv('ADZUNA_BASE_URL')

In [3]:
user_preferences = {
    'job_role': 'ML Engineer',
    'preferred_location': 'Bangalore',
    'country': 'India',
    'full_time': 1,
    'permanent': 1,
    'country_code': 'in'    
}

In [4]:
page_num = 1

In [5]:
filters_to_apply = {
    "app_id": ADZUNA_APP_ID,
    "app_key": ADZUNA_API_KEY,
    "what": user_preferences.get('job_role', ''),
    "where": user_preferences.get('Bangalore', ''),
    "results_per_page": 5,  # Max 50 per page
    "full_time": user_preferences.get('full_time', 1),  # Only full-time jobs (1 = yes, 0 = no)
    "permanent": user_preferences.get('permanent', 1),  # Only permanent positions
    "max_days_old": 60  # Jobs posted in last 7 days
}

search_url = f"{ADZUNA_BASE_URL}{user_preferences.get('country_code')}/search/{page_num}"
job_res = requests.get(url=search_url, params=filters_to_apply)
job_res_json = job_res.json()

In [6]:
job_res_json

{'count': 89,
 'mean': 2300000,
 '__CLASS__': 'Adzuna::API::Response::JobSearchResults',
 'results': [{'created': '2026-01-16T09:55:45Z',
   'redirect_url': 'https://www.adzuna.in/land/ad/5587360588?se=LqigxO0B8RGrs-9NXR-jVg&utm_medium=api&utm_source=70a709d9&v=5B84F4C44ADD61EE546027ADA0CDC65213F93495',
   'contract_time': 'full_time',
   'longitude': 78.50806,
   'title': 'AI/ML Engineer',
   '__CLASS__': 'Adzuna::API::Response::Job',
   'adref': 'eyJhbGciOiJIUzI1NiJ9.eyJzIjoiTHFpZ3hPMEI4Ukdycy05TlhSLWpWZyIsImkiOiI1NTg3MzYwNTg4In0.-Glfqjru2Vy2EqHV3uKdAz7PG58f4X6YSb75wHjmSNI',
   'contract_type': 'permanent',
   'location': {'area': ['India', 'Telangana', 'Hyderabad'],
    'display_name': 'Hyderabad, Telangana',
    '__CLASS__': 'Adzuna::API::Response::Location'},
   'id': '5587360588',
   'company': {'__CLASS__': 'Adzuna::API::Response::Company',
    'display_name': 'K&K Global Talent Solutions INC.'},
   'category': {'label': 'IT Jobs',
    'tag': 'it-jobs',
    '__CLASS__': 'Adzuna:

In [7]:
job_listing = list()

In [38]:
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import ChatPromptTemplate

In [None]:
def get_structured_data(data):
    try:
        parser = JsonOutputParser()
        user_prompt = f"""I have a an HTML snippet taken from a job posting. 
            Your task is to convert prase that string, extract HTML content from it, and provide me important information from it. The text in question is {data}
            The information that I want:
            - Company name: name of the company that posted the job listing
            - Company description: about the company
            - roles & responsibilities
            - skills required
            - skills good to have
            - what will the candidate do
            - any additional notes or benefits or perks, and categorise them
            - salary, if provided

            Provide the response in the form of valid json only
        """

        input_list = [
            {
                "role": "system",
                "content": "You are an expert at extracting meaningful information from unstructured text that was written in html, but was stringified."
            },{
            "role": "user",
            "content": user_prompt}
        ]

        prompt = ChatPromptTemplate.from_template(user_prompt)

        ollama_model = "qwen3:8b"

        llm = ChatOllama(model=ollama_model, temperature=0.0)
        response_ollama = ollama.chat(
            model = ollama_model,
            messages=input_list,
            options={"temperature": 0.0},
        )

        final_res = json.loads(response_ollama.message.content)
        # chain = prompt | llm | parser
        # final_res = chain.invoke()
        return final_res

    except Exception as e:
        print(type(e))
        print(f"Error in getting structured data: {e}")

In [22]:
failed = list()
def fetch_job_requirements(job_data):
    job_details_to_show = dict()
    try:
        company_name = job_data.get('display_name', 'NA')
        job_title = job_data.get('title', 'NA')
        description = job_data.get('description', 'NA')
        employment_type = job_data.get('contract_time', 'NA')
        job_location = job_data.get('location', {}).get('display_name', 'NA')
        posted_at = job_data.get('createdAt', 'NA')
        contract_type = job_data.get('contract_type', 'NA')
        more_details_url = job_data.get('redirect_url', 'NA')

        job_details_to_show = {
            'company_1':company_name,
            'job_title': job_title,
            'job_location': job_location,
            'posted_at': posted_at,
            'employment_type': employment_type,
            'contract_type': contract_type,
        }

        job_from_link = requests.get(url=more_details_url)
        print(job_from_link)
        if job_from_link.status_code == 403:
            failed.append({more_details_url: job_from_link})
            return None

        job_html = bs4.BeautifulSoup(job_from_link.text, features='html.parser')
        job_details = job_html.find_all('section', class_='adp-body')
        if len(job_details) == 0:
            failed.append({more_details_url: job_from_link})
            return job_details_to_show
        
        job_requirements = get_structured_data(job_details[0])
        print("job req: ", job_requirements)


        

        if job_requirements is not None:
            for (k,v) in job_requirements.items():
                job_details_to_show[k] = v

        return job_details_to_show

        

    except Exception as e:
        print(type(e))
        print(f"Exception in fetching job requirements: ", e)
        return job_details_to_show


In [23]:
for res in job_res_json.get("results"):
    if res is None:
        continue
    print(fetch_job_requirements(res))
    print("-"*150)

<Response [403]>
None
------------------------------------------------------------------------------------------------------------------------------------------------------
<Response [403]>
None
------------------------------------------------------------------------------------------------------------------------------------------------------
<Response [403]>
None
------------------------------------------------------------------------------------------------------------------------------------------------------
<Response [200]>
job req:  {'company_name': 'Not provided', 'company_description': 'Not provided', 'roles_and_responsibilities': ['Build time-series demand forecasting models using rolling statistics, classical ML, and deep learning', 'Develop spatial / geospatial models using hexagonal or grid-based representations (e.g., H3)', 'Implement classification models (e.g., logistic regression, tree-based models) for risk and state detection', 'Design and optimize scoring and rankin

In [None]:
failed

[{'https://www.adzuna.in/land/ad/5587360588?se=LqigxO0B8RGrs-9NXR-jVg&utm_medium=api&utm_source=70a709d9&v=5B84F4C44ADD61EE546027ADA0CDC65213F93495': <Response [403]>},
 {'https://www.adzuna.in/land/ad/5578965637?se=LqigxO0B8RGrs-9NXR-jVg&utm_medium=api&utm_source=70a709d9&v=B032A3EC3E284D892A2C8CA45FBE2347A3393657': <Response [403]>},
 {'https://www.adzuna.in/land/ad/5578965185?se=LqigxO0B8RGrs-9NXR-jVg&utm_medium=api&utm_source=70a709d9&v=C731FA0E08E9311719834BF85E19D67F1FC5E688': <Response [403]>}]

In [85]:
from langchain import agents
from langchain.agents import create_agent
from langchain.tools import tool

from langchain.agents.middleware import wrap_tool_call
from langchain.messages import ToolMessage, SystemMessage, HumanMessage, AIMessage

In [27]:
from langchain_ollama import ChatOllama
from langchain_core.prompts import ChatPromptTemplate

# Initialize the model, ensure the model name matches what you pulled with Ollama
llm = ChatOllama(model="qwen3:8b")

In [None]:
agent = create_agent(
    model=ChatOllama(model="qwen3:8b"),
    system_prompt=SystemMessage(
        content=[
            {
                "type": "text",
                "text": "You are an assistant that has the knowledge of the entire world"
            }
        ]
    )
)
text = "I am looking for a role of machine learning engineer in Hyderabad"
agent_res = agent.invoke({
    "messages": [HumanMessage(f"""Given an unstructured text, extract the following entites: 
                              - job_title,
                              - location,
                              - country,
                              -country_short_code
                              From the location, find out country and country code as well
                              Return the answer in the form of a valid JSON response only
                              text: {text}""")]
})

In [33]:
agent_res

{'messages': [HumanMessage(content='Given an unstructured test, extract the following entites: \n                              - job_title,\n                              - location,\n                              - country,\n                              -country_short_code\n                              From the location, find out country and country code as well\n                              Return the answer in the form of a valid JSON response only\n                              text: I am looking for a role of machine learning engineer in Hyderabad', additional_kwargs={}, response_metadata={}, id='5c9a10f3-e789-4c3b-9e94-a6508e45d02d'),
  AIMessage(content='{\n  "job_title": "machine learning engineer",\n  "location": "Hyderabad",\n  "country": "India",\n  "country_short_code": "IN"\n}', additional_kwargs={}, response_metadata={'model': 'qwen3:8b', 'created_at': '2026-02-04T18:05:05.7432947Z', 'done': True, 'done_reason': 'stop', 'total_duration': 5186980600, 'load_duration': 56

In [34]:
def fetch_jobs(location, title, country_code):
    job_list = list()
    try:
        filters_to_apply = {
        "app_id": ADZUNA_APP_ID,
        "app_key": ADZUNA_API_KEY,
        "what": title,
        "where": location,
        "results_per_page": 5,  # Max 50 per page
        "full_time": user_preferences.get('full_time', 1),  # Only full-time jobs (1 = yes, 0 = no)
        "permanent": user_preferences.get('permanent', 1),  # Only permanent positions
        "max_days_old": 60  # Jobs posted in last 7 days
        }

        search_url = f"{ADZUNA_BASE_URL}{country_code}/search/{page_num}"
        job_res = requests.get(url=search_url, params=filters_to_apply)
        job_res_json = job_res.json()
        for res in job_res_json.get("results"):
            if res is None:
                continue
            job = fetch_job_requirements(res)
            if job is None:
                continue
            job_list.append(job)
            return job_list
    except Exception as e:
        print(type(e))
        print(f"Exception in fetching jobs: {e}")
        return job_list
    


In [35]:
from typing import TypedDict, List, Optional

class JobSearchState(TypedDict):
    user_query: str
    title: Optional[str]
    location: Optional[str]
    country: Optional[str]
    country_code: Optional[str]
    jobs: Optional[List[dict]]


In [36]:
from langchain_core.output_parsers import JsonOutputParser

parser = JsonOutputParser()

In [62]:
def extract_entities(state: JobSearchState):
    parser = JsonOutputParser()
    prompt = ChatPromptTemplate.from_template(f"""Given an unstructured text, extract the following entites: 
                              - title: title of the job,
                              - location,
                              - country,
                              - country_code
                              From the location, find out country and country code as well
                              Return the answer in the form of a valid JSON response only
                              text: {text}""")
    llm = ChatOllama(model="qwen3:8b", temperature=0.0)
    extraction_chain = prompt | llm 

    result = extraction_chain.invoke({"query": state['user_query']})
    print("res is: ", result)
    res_json = json.loads(result.content)
    # return result
    return {
        "title": res_json.get("title"),
        "location": res_json.get("location"),
        "country": res_json.get("country"),
        "country_code": res_json.get("country_code"),
    }
    


In [102]:
def search_jobs(state: JobSearchState):
    job_list = list()
    try:
        filters_to_apply = {
        "app_id": ADZUNA_APP_ID,
        "app_key": ADZUNA_API_KEY,
        "what": state['title'],
        "where": state['location'],
        "results_per_page": 50,  # Max 50 per page
        "full_time": user_preferences.get('full_time', 1),  # Only full-time jobs (1 = yes, 0 = no)
        "permanent": user_preferences.get('permanent', 1),  # Only permanent positions
        "max_days_old": 60  # Jobs posted in last 7 days
        }

        search_url = f"{ADZUNA_BASE_URL}{state.get('country_code', '').lower()}/search/{page_num}"
        print(search_url)
        
        job_res = requests.get(url=search_url, params=filters_to_apply)
        job_res_json = job_res.json()
        print("count: ", job_res_json.get("count"))
        for res in job_res_json.get("results"):
            if res is None:
                continue
            job = fetch_job_requirements(res)
            if job is None:
                continue
            job_list.append(job)
            return {"jobs": job_list}
    except Exception as e:
        print(type(e))
        print(f"Exception in fetching jobs: {e}")
        return {"jobs": job_list}

In [103]:
def enrich_data(state: JobSearchState):
    enriched = []
    parser = JsonOutputParser()
    for job in state["jobs"]:
        print("job is: ", job)
        
        enrichment_agent = create_agent(
            model=ChatOllama(model="qwen3:8b"),
            system_prompt=SystemMessage(
                content=[
                    {
                        "type": "text",
                        "text": "You are an expert in JSON data structure"
                    }
                ]
            )
        )
        
        result = enrichment_agent.invoke({
            'messages': [HumanMessage(f"""
                        You are given a JSON data, with multiple fields. 
                        Clean, and restructure the json, and return the result in valid JSON format only.
                        input: {job}
                        """)]
        })
        print("result of en: ", result.get('messages', {})[-1].content)
        valid_json = json.loads(result.get('messages', {})[-1].content)
        enriched.append(valid_json)
    return {"jobs": enriched}


In [41]:
from langgraph.graph import StateGraph, END

In [104]:
graph = StateGraph(JobSearchState)

graph.add_node("extract_entities", extract_entities)
graph.add_node("search_jobs", search_jobs)
graph.add_node("enrich_jobs", enrich_data)

graph.set_entry_point("extract_entities")

graph.add_edge("extract_entities", "search_jobs")
graph.add_edge("search_jobs", "enrich_jobs")

graph.add_edge("enrich_jobs", END)

job_agent = graph.compile()

In [105]:
entities = job_agent.invoke({"user_query": "I am looking for a machine learning engineer role in Hyderabad"})
print("entities are: ", entities)


res is:  content='{\n  "title": "machine learning engineer",\n  "location": "Hyderabad",\n  "country": "India",\n  "country_code": "IN"\n}' additional_kwargs={} response_metadata={'model': 'qwen3:8b', 'created_at': '2026-02-04T19:11:44.1410571Z', 'done': True, 'done_reason': 'stop', 'total_duration': 3142131900, 'load_duration': 55344000, 'prompt_eval_count': 88, 'prompt_eval_duration': 26175900, 'eval_count': 240, 'eval_duration': 3055985200, 'logprobs': None, 'model_name': 'qwen3:8b', 'model_provider': 'ollama'} id='lc_run--019c2a11-45c6-74c0-aa83-38555d6adb2e-0' tool_calls=[] invalid_tool_calls=[] usage_metadata={'input_tokens': 88, 'output_tokens': 240, 'total_tokens': 328}
https://api.adzuna.com/v1/api/jobs/in/search/1
count:  13
<Response [200]>
job req:  {'company_name': 'SciTech Patent Art', 'company_description': 'At SciTech Patent Art, we leverage cutting-edge technology to deliver innovative solutions in intellectual property and research domains. Our AI/ML team works on rea

In [106]:
entities.get("jobs")

[{'job_title': 'Machine Learning Engineer',
  'job_location': 'Hyderabad, Telangana',
  'employment_type': 'full_time',
  'contract_type': 'permanent',
  'company_name': 'SciTech Patent Art',
  'company_description': 'At SciTech Patent Art, we leverage cutting-edge technology to deliver innovative solutions in intellectual property and research domains. Our AI/ML team works on real-world applications of Natural Language Processing (NLP), Large Language Models (LLMs), and Generative AI—driving impactful change across industries.',
  'roles_and_responsibilities': ['Design, develop, and optimize machine learning models for NLP, computer vision, and predictive analytics.',
   'Apply prompt engineering and prompt chaining to improve AI performance.',
   'Work with tools like Langflow for AI orchestration.',
   'Handle large-scale text data—perform data preprocessing, feature engineering, and augmentation.',
   'Research, evaluate, and implement the latest ML algorithms and frameworks.',
   

In [67]:
entities.get("jobs")

[{'company_1': 'NA',
  'job_title': 'Machine Learning Engineer',
  'job_location': 'Hyderabad, Telangana',
  'posted_at': 'NA',
  'employment_type': 'full_time',
  'contract_type': 'permanent',
  'company_name': 'SciTech Patent Art',
  'company_description': 'At SciTech Patent Art, we leverage cutting-edge technology to deliver innovative solutions in intellectual property and research domains. Our AI/ML team works on real-world applications of Natural Language Processing (NLP), Large Language Models (LLMs), and Generative AI—driving impactful change across industries.',
  'roles_and_responsibilities': ['Design, develop, and optimize machine learning models for NLP, computer vision, and predictive analytics.',
   'Apply prompt engineering and prompt chaining to improve AI performance.',
   'Work with tools like Langflow for AI orchestration.',
   'Handle large-scale text data—perform data preprocessing, feature engineering, and augmentation.',
   'Research, evaluate, and implement the 

In [86]:
p = {
    "messages": [
        HumanMessage(
            content="\n                        You are given a JSON data, with multiple fields. \n                        Clean, and restructure the json, and return the result in valid JSON format only.\n                        input: {'company_1': 'NA', 'job_title': 'Machine Learning Engineer', 'job_location': 'Hyderabad, Telangana', 'posted_at': 'NA', 'employment_type': 'full_time', 'contract_type': 'permanent', 'company_name': 'SciTech Patent Art', 'company_description': 'At SciTech Patent Art, we leverage cutting-edge technology to deliver innovative solutions in intellectual property and research domains. Our AI/ML team works on real-world applications of Natural Language Processing (NLP), Large Language Models (LLMs), and Generative AI—driving impactful change across industries.', 'roles_and_responsibilities': ['Design, develop, and optimize machine learning models for NLP, computer vision, and predictive analytics.', 'Apply prompt engineering and prompt chaining to improve AI performance.', 'Work with tools like Langflow for AI orchestration.', 'Handle large-scale text data—perform data preprocessing, feature engineering, and augmentation.', 'Research, evaluate, and implement the latest ML algorithms and frameworks.', 'Collaborate with cross-functional teams to turn business requirements into technical solutions.', 'Deploy AI models to cloud environments (AWS, Azure, GCP) for production use.'], 'skills_required': ['Bachelor’s or Master’s in Computer Science, Machine Learning, AI, or related field.', '2–3 years of hands-on experience in AI/ML, with a strong focus on NLP and LLM-based projects.', 'Proficiency in Python and popular ML libraries (TensorFlow, PyTorch, Hugging Face Transformers, etc.).', 'Strong skills in prompt engineering/prompt chaining.', 'Experience with Langflow or similar AI workflow tools.', 'Strong understanding of statistical concepts, ML algorithms, and model evaluation techniques.', 'Experience with cloud platforms and model deployment.', 'Excellent problem-solving, analytical, and communication skills.'], 'skills_good_to_have': ['Familiarity with web crawling and scraping tools.'], 'what_will_candidate_do': 'Collaborate with data scientists, software engineers, and domain experts to design, develop, and deploy AI-powered solutions. Work on large datasets, build advanced NLP pipelines, and experiment with state-of-the-art LLMs to create real-world impact.', 'additional_notes_or_perks': ['Innovative AI Work – Be part of projects at the forefront of NLP and Generative AI.', 'Real-World Impact – Your solutions will directly shape client outcomes and business decisions.', 'Collaborative Environment – Work alongside experts in AI, data science, and domain research.', 'Continuous Growth – Access to training, research resources, and the latest AI tools.'], 'salary': None}\n                        ",
            additional_kwargs={},
            response_metadata={},
            id="57268df9-d059-4a5d-9a38-484c19aba305",
        ),
        AIMessage(
            content='{\n  "job_title": "Machine Learning Engineer",\n  "job_location": "Hyderabad, Telangana",\n  "employment_type": "full_time",\n  "contract_type": "permanent",\n  "company_name": "SciTech Patent Art",\n  "company_description": "At SciTech Patent Art, we leverage cutting-edge technology to deliver innovative solutions in intellectual property and research domains. Our AI/ML team works on real-world applications of Natural Language Processing (NLP), Large Language Models (LLMs), and Generative AI—driving impactful change across industries.",\n  "roles_and_responsibilities": [\n    "Design, develop, and optimize machine learning models for NLP, computer vision, and predictive analytics.",\n    "Apply prompt engineering and prompt chaining to improve AI performance.",\n    "Work with tools like Langflow for AI orchestration.",\n    "Handle large-scale text data—perform data preprocessing, feature engineering, and augmentation.",\n    "Research, evaluate, and implement the latest ML algorithms and frameworks.",\n    "Collaborate with cross-functional teams to turn business requirements into technical solutions.",\n    "Deploy AI models to cloud environments (AWS, Azure, GCP) for production use."\n  ],\n  "skills_required": [\n    "Bachelor’s or Master’s in Computer Science, Machine Learning, AI, or related field.",\n    "2–3 years of hands-on experience in AI/ML, with a strong focus on NLP and LLM-based projects.",\n    "Proficiency in Python and popular ML libraries (TensorFlow, PyTorch, Hugging Face Transformers, etc.).",\n    "Strong skills in prompt engineering/prompt chaining.",\n    "Experience with Langflow or similar AI workflow tools.",\n    "Strong understanding of statistical concepts, ML algorithms, and model evaluation techniques.",\n    "Experience with cloud platforms and model deployment.",\n    "Excellent problem-solving, analytical, and communication skills."\n  ],\n  "skills_good_to_have": [\n    "Familiarity with web crawling and scraping tools."\n  ],\n  "what_will_candidate_do": "Collaborate with data scientists, software engineers, and domain experts to design, develop, and deploy AI-powered solutions. Work on large datasets, build advanced NLP pipelines, and experiment with state-of-the-art LLMs to create real-world impact.",\n  "additional_notes_or_perks": "Innovative AI Work – Be part of projects at the forefront of NLP and Generative AI.\\nReal-World Impact – Your solutions will directly shape client outcomes and business decisions.\\nCollaborative Environment – Work alongside experts in AI, data science, and domain research.\\nContinuous Growth – Access to training, research resources, and the latest AI tools.",\n  "salary": null\n}',
            additional_kwargs={},
            response_metadata={
                "model": "qwen3:8b",
                "created_at": "2026-02-04T19:02:48.6178847Z",
                "done": True,
                "done_reason": "stop",
                "total_duration": 14859009900,
                "load_duration": 57191900,
                "prompt_eval_count": 600,
                "prompt_eval_duration": 111810700,
                "eval_count": 1152,
                "eval_duration": 14674541600,
                "logprobs": None,
                "model_name": "qwen3:8b",
                "model_provider": "ollama",
            },
            id="lc_run--019c2a08-ec1e-7bc2-8c6d-e0759a978dcf-0",
            tool_calls=[],
            invalid_tool_calls=[],
            usage_metadata={
                "input_tokens": 600,
                "output_tokens": 1152,
                "total_tokens": 1752,
            },
        ),
    ]
}

In [92]:
p.get("messages")[-1].content

'{\n  "job_title": "Machine Learning Engineer",\n  "job_location": "Hyderabad, Telangana",\n  "employment_type": "full_time",\n  "contract_type": "permanent",\n  "company_name": "SciTech Patent Art",\n  "company_description": "At SciTech Patent Art, we leverage cutting-edge technology to deliver innovative solutions in intellectual property and research domains. Our AI/ML team works on real-world applications of Natural Language Processing (NLP), Large Language Models (LLMs), and Generative AI—driving impactful change across industries.",\n  "roles_and_responsibilities": [\n    "Design, develop, and optimize machine learning models for NLP, computer vision, and predictive analytics.",\n    "Apply prompt engineering and prompt chaining to improve AI performance.",\n    "Work with tools like Langflow for AI orchestration.",\n    "Handle large-scale text data—perform data preprocessing, feature engineering, and augmentation.",\n    "Research, evaluate, and implement the latest ML algorith