In [41]:
from dotenv import load_dotenv
load_dotenv()

True

In [42]:

import nest_asyncio
nest_asyncio.apply()

from rich import print as pprint 

In [43]:
from pydantic_ai import Agent 

agent = Agent(model="groq:llama-3.2-1b-preview",system_prompt="You are world class cook. Provide recipes based on user request")

result = agent.run_sync("Suggest me 3 reciepe for today's breakfast")
pprint(result.data)

--What is given. 

1. Fetch the job data from URL provided.
2. Use the job posting, and create a mail

In [44]:
# Scraping the job data from given URL 

import requests
from bs4 import BeautifulSoup
from datetime import datetime
import html2text
import re

def clean_text(text):
    """Clean extracted text by removing extra whitespace and empty lines"""
    text = re.sub(r'\n\s*\n', '\n\n', text.strip())
    return text

def scrape_website(url, selector=None):

    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
        }
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Remove unwanted elements
        for element in soup.select('script, style, nav, footer, header'):
            element.decompose()
        
        # Convert to Markdown
        h = html2text.HTML2Text()
        h.ignore_links = False
        h.ignore_images = False
        h.body_width = 0  
        
        if selector:
            elements = soup.select(selector)
            content = '\n\n'.join(h.handle(str(element)) for element in elements)
        
        else:
            
            content = h.handle(str(soup.body))
        
        # Clean and save content
        content = clean_text(content)
        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
        
        with open(f'scraped_content_{timestamp}.md', 'w', encoding='utf-8') as file:
            file.write(content)
            
        return content
        
    except Exception as e:
        print(f"Error: {e}")
        return ""

In [53]:
from typing import List
from dataclasses import dataclass
from pydantic import BaseModel, Field
from pydantic_ai import RunContext

# tool input
@dataclass
class JobInformationFetch:
    job_post_url: str 
    
#tool output - Agent input 
@dataclass
class JobDescriptionAgentDependencies:
    job_posting_information: str
    
class JobDescriptionAgentResult(BaseModel):
    
    role: str = Field(
    description="The job title or role position being described (e.g., 'Senior Software Engineer', 'Product Manager')"
    )
    company_name: str = Field(description="The Company which posted job")
    experience: str = Field(
        description="Required years and type of experience for the position (e.g., '5+ years of software development')"
    )
    skills: List[str] = Field(
        description="List of specific technical skills, tools, or competencies required for the role (e.g., ['Python', 'AWS', 'Machine Learning'])"
    )
    description: str = Field(
        description="Detailed overview of the job responsibilities, requirements, and expectations"
    )
    
    
job_description_parser_agent = Agent(
    model = 'groq:llama-3.3-70b-versatile',
    deps_type = JobInformationFetch,
    result_type = JobDescriptionAgentResult,
   
    system_prompt = 
    
    """  
    You are a specialized HR assistant focused on analyzing and structuring job descriptions. Your primary responsibilities are:
    1. Use the get_job_details tool to retrieve job posting information
    2. Extract and categorize key components including:
    - Core role/position title
    - Required experience level
    - Essential skills and qualifications
    - Detailed role description and responsibilities
    Format all outputs according to the JobDescription schema. Be precise and consistent in your categorization. When analyzing skills:
    - Focus on specific technical and professional competencies
    - Separate distinct skills into individual items
    - Standardize skill names (e.g., "Python" not "python programming")
    If job details are ambiguous or incomplete, make reasonable inferences based on industry standards while maintaining accuracy.
    
    """
)

In [54]:

@job_description_parser_agent.tool
def get_job_details_internet(ctx: RunContext[JobInformationFetch]) -> JobDescriptionAgentDependencies:
    
    """
    Retrieves and extracts job posting information
    """
    
    job_post_url = ctx.deps.job_post_url #triggers the input job post url
    job_posting_information = scrape_website(url=job_post_url)  
   
    print(job_posting_information)
    return JobDescriptionAgentDependencies(
        job_posting_information=job_posting_information
    )

In [55]:
job_description_agent_result = job_description_parser_agent.run_sync(
    "please extract job descriptuon for provided url",
    deps = JobInformationFetch(job_post_url="https://boards.greenhouse.io/anthropic/jobs/4461687008"), 
)

print(job_description_agent_result.data)

role='Software Engineer' company_name='ABC Corporation' experience='5+ years' skills=['Python', 'Java', 'AWS'] description='Design, develop, and test software applications'
