In [1]:
%load_ext autoreload
%autoreload 2

## The job search recruitment agency!
1. Profiler
2. Recruiter
3. Matcher
4. Cover Letter Writer

In [2]:
import warnings
import os
from dotenv import load_dotenv, find_dotenv
import yaml
from crewai import Crew, Process, Agent, Task
from crewai.tools import tool

In [3]:
_ = load_dotenv(find_dotenv())
warnings.filterwarnings("ignore")
os.environ['OPENAI_MODEL_NAME'] = 'gpt-4o-mini'

Instantiate LLM Observability

In [4]:
from phoenix.otel import register

tracer_provider = register(
    project_name = "crewai",
    endpoint="http://localhost:6006/v1/traces"
)

🔭 OpenTelemetry Tracing Details 🔭
|  Phoenix Project: crewai
|  Span Processor: SimpleSpanProcessor
|  Collector Endpoint: http://localhost:6006/v1/traces
|  Transport: HTTP
|  Transport Headers: {'api_key': '****'}
|  
|  Using a default SpanProcessor. `add_span_processor` will overwrite this default.
|  
|  `register` has set this TracerProvider as the global OpenTelemetry default.
|  To disable this behavior, call `register` with `set_global_tracer_provider=False`.



In [5]:
from openinference.instrumentation.crewai import CrewAIInstrumentor
from openinference.instrumentation.litellm import LiteLLMInstrumentor

CrewAIInstrumentor().instrument(tracer_provider=tracer_provider)
LiteLLMInstrumentor().instrument(tracer_provider=tracer_provider)

### Loading tasks and agents from yaml files

In [6]:
from typing import Dict, Tuple
def load_configs(files: Dict[str, str]) -> Tuple[dict, dict]:
    """Loads yml config files from a dictionary of filepaths

    Args:
        files (Dict[str, str]): Dictionary of filepaths containing yml files
        For example:
        {
            'agents': '../config/agents.yml',
            'tasks': '../config/tasks.yml'
        }
    """
    configs = {}
    for config_type, file_path in files.items():
        with open(file_path, 'r') as file:
            configs[config_type] = yaml.safe_load(file)
    return configs['agents'], configs['tasks']

In [7]:
job_search_agents_config, job_search_tasks_config = load_configs(
    {
        'agents': '../config/job_search_agents.yml',
        'tasks': '../config/job_search_tasks.yml',
    }
)

## Create Pydantic Models for Structured Output

In [8]:
from pydantic import BaseModel, Field
from typing import Literal, List

class WorkExperience(BaseModel):
    project_title: str = Field(..., description="Name of project")
    summary_of_experience: str = Field(..., description="Short summary of experience")
    skills_exhibited: List[str] = Field(..., description="Skills exhibited during this project")

class CandidateProfile(BaseModel):
    name: str = Field(..., description="Candidate's name")
    highest_qualification: Literal["Bachelor's Degree", "Masters Degree", "PhD"] = Field(..., description="Candidate's highest qualification")
    field_of_study: str = Field(..., description="Candidate's field of study for highest qualification")
    institution_of_study: str = Field(..., description="Candidate's institution of study for highest qualification")
    technical_skills: List[str] = Field(..., description="List of candidate's technical skills. For example 'machine learning'.")
    non_technical_skills: List[str] = Field(..., description="List of candidate's non-technical skills. For example 'strategic planning'.")
    years_of_experience: int = Field(..., description="Candidate's years of experience")
    current_job_title: str = Field(..., description="Candidate's current job title")
    work_experiences: List[WorkExperience] = Field(..., description="Candidate's job experience")

class JobListing(BaseModel):
    company_name: str = Field(..., description="Company name")
    job_title: str = Field(..., description="Job title")
    job_description: str = Field(..., description="Job description")
    required_technical_skills: List[str] = Field(..., description="List of required technical skills. For example 'machine learning'.")
    required_non_technical_skills: List[str] = Field(..., description="List of required non-technical skills. For example 'strategic planning'.")
    years_of_experience_required: int = Field(..., description="Years of experience required")
    reason_for_candidate_fit: str = Field(..., description="Reason for candidate fit to job.")

class JobListings(BaseModel):
    job_listings: List[JobListing] = Field(..., description="List of job listings")

class ScoredListing(BaseModel):
    job_listing: JobListing = Field(..., description="Job listing of interest")
    score: float = Field(..., description="Score of job listing fit to candidate profile on a scale of 1-10.")
    justfication: str = Field(..., description="Justification for score")

class ScoredListings(BaseModel):
    scored_listings: List[ScoredListing] = Field(..., description="List of scored job listings")

class CoverLetter(BaseModel):
    scored_listing: ScoredListing = Field(..., description="Scored job listing of interest")
    cover_letter: str = Field(..., description="Cover letter for scored listing")

class CoverLetters(BaseModel):
    cover_letters: List[CoverLetter] = Field(..., description="List of cover letters")

## Writing a custom tool to load in the resume!

In [9]:
from crewai.tools import BaseTool
from llama_parse import LlamaParse
from typing import Any
import nest_asyncio
nest_asyncio.apply()

class ResumeParser(BaseTool):
    name: str = "Resume reader"
    description: str = "Parses a pdf resume file neatly for LLM processing"
    parser: LlamaParse = LlamaParse(
        api_key = os.environ["LLAMA_CLOUD_API_KEY"],
        use_vendor_multimodal_model=True,
        vendor_multimodal_name="anthropic-sonnet-3.5",
        result_type="markdown"
    )
    
    def find_resume(self) -> str:
        """Method to search 2 levels up for resume folder"""
        
        __curdir__ = os.getcwd()
        depth = 0
        resume_dir_path = ""
        
        while (depth<3):    
            if "Resume" not in os.listdir(__curdir__):
                __curdir__ = os.path.abspath(os.path.join(__curdir__, os.path.pardir))
                depth += 1
            else:
                resume_dir_path = os.path.abspath(os.path.join(__curdir__, "Resume"))
                break
        
        return resume_dir_path
    
    def _run(self) -> Dict[str, Any]:
        """Returns the entire resume in string format."""
        
        file_path = self.find_resume()
        resume = os.path.join(
            file_path,
            os.listdir(file_path)[0]
        )
        docs = self.parser.load_data(file_path=resume)
        return {
            "resume": "\n\n".join([doc.text for doc in docs])
        }

##### Creating LlamaIndex Tavily Tool

In [10]:
from crewai_tools import LlamaIndexTool
from llama_index.tools.tavily_research import TavilyToolSpec

tavily_spec = TavilyToolSpec(api_key=os.environ["TAVILY_API_KEY"])
tavily_tool_list = tavily_spec.to_tool_list()
tavily_tools = [LlamaIndexTool.from_tool(tool) for tool in tavily_tool_list]

#### Creating our Crew, Agents and Tasks

In [11]:
profiler_agent = Agent(
    config = job_search_agents_config['profiler'],
    tools = [ResumeParser()]
)
recruiter = Agent(
    config=job_search_agents_config['recruiter'],
    tools = [*tavily_tools] 
)
matcher = Agent(
    config = job_search_agents_config['matcher'],
)
reporter = Agent(
    config=job_search_agents_config['reporter'],
)

In [12]:
profiling_task = Task(
    config = job_search_tasks_config['candidate_profiling_task'],
    agent = profiler_agent,
    output_pydantic = CandidateProfile,
    expected_output = "A clear candidate profile"
)
research_task = Task(
    config = job_search_tasks_config['research_job_listings_task'],
    agent = recruiter,
    context = [profiling_task],
    output_pydantic = JobListings,
    expected_output = "A list of job listings that fit the candidate profile"
)
scoring_task = Task(
    config = job_search_tasks_config['match_and_score_listings_task'],
    agent = matcher,
    context = [research_task, profiling_task],
    output_pydantic=ScoredListings,
    expected_output = "A list of top 3 job listings ranked with justification"
)
cover_letter_task = Task(
    config = job_search_tasks_config['cover_letter_task'],
    agent = reporter,
    context = [scoring_task, profiling_task],
    output_pydantic=CoverLetters,
    expected_output="A list of cover letters for the top 3 job listings"
)

In [13]:
job_crew = Crew(
    agents = [
        profiler_agent,
        recruiter,
        matcher,
        reporter
    ],
    tasks = [
        profiling_task,
        research_task,
        scoring_task,
        cover_letter_task
    ],
    verbose = True,
)

results = job_crew.kickoff()



[1m[95m# Agent:[00m [1m[92mLead Human Resource Manager[00m
[95m## Task:[00m [92mAnalyze the candidate's resume to identify their main strengths and qualifications. Highlight the candidate's technical and non-technical abilities to determine their suitability  for specific job roles.
[00m
Started parsing the file under job_id b3b6277c-bacc-4086-bb93-0f2a9f8759de






[1m[95m# Agent:[00m [1m[92mLead Human Resource Manager[00m
[95m## Using tool:[00m [92mResume reader[00m
[95m## Tool Input:[00m [92m
"{\"file_path\": \"path_to_resume.pdf\"}"[00m
[95m## Tool Output:[00m [92m
{'resume': "# LIM HSIEN YONG (“Ttrus”)\n\ntituslhy@gmail.com · +65 9092 6178  \nlinkedin.com/itituslim · github.com/itituslhy · https://itituslim.onrender.com/\n\n## EDUCATION\n\n### SINGAPORE MANAGEMENT UNIVERSITY  \n**Master of IT in Business (Artificial Intelligence Track)**  \nDean’s List, GPA: 3.98 / 4.0  \n- Awarded the SMU AI Talent Development Grant and SMU MITB Scholarship  \n*Aug 2021 - Dec 2022*\n\n### NATIONAL UNIVERSITY OF SINGAPORE  \n**Bachelor of Engineering in Chemical Engineering (2nd Upper Honors)**  \n- Awarded the NUS Undergraduate Scholarship, Barco-Santander Scholarship and IE Singapore Young Talent Program Market Immersion Award  \n*Aug 2011 - Jul 2015*\n\n## EXPERIENCE\n\n### Illumina – Singapore  \n**Senior Data Scientist**  \n*Jan 2023 –



[1m[95m# Agent:[00m [1m[92mJob Openings Researcher[00m
[95m## Task:[00m [92mConduct thorough research to find potential job listings for the candidate. Utilize various online resources and databases to gather a comprehensive list of potential listings. Ensure that the candidates meet the job requirements provided.
[00m






[1m[95m# Agent:[00m [1m[92mJob Openings Researcher[00m
[95m## Thought:[00m [92mI need to search for job listings that fit the candidate profile of Lim Hsien Yong, who is a Senior Data Scientist with extensive experience in machine learning, AI, and data analytics.[00m
[95m## Using tool:[00m [92msearch[00m
[95m## Tool Input:[00m [92m
"{\"query\": \"Senior Data Scientist jobs with a focus on machine learning, AI, and data analytics\", \"max_results\": 6}"[00m
[95m## Tool Output:[00m [92m
[Document(id_='a91cf7f6-be30-4a1d-8b70-bf6b273640f9', embedding=None, metadata={'url': 'https://www.indeed.com/q-senior-data-scientist-ml-ai-jobs.html'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=MediaResource(embeddings=None, data=None, text='234 Senior Data Scientist Ml Ai jobs available on Indeed.com. Apply to Senior Data Scientist, Senior Ai/ml Engineer, Ai/ml Engi





[1m[95m# Agent:[00m [1m[92mJob Openings Researcher[00m
[95m## Final Answer:[00m [92m
{
  "job_listings": [
    {
      "company_name": "Indeed",
      "job_title": "Senior Data Scientist",
      "job_description": "The Senior Data Scientist plays a critical role in leading analytical projects and developing advanced models that solve complex business challenges. Candidates should bring 5-10 years of experience in data science, with deep expertise in statistical analysis, machine learning, and big data technologies.",
      "required_technical_skills": ["Machine Learning", "Statistical Analysis", "Big Data Technologies"],
      "required_non_technical_skills": ["Project Management", "Analytical Thinking", "Problem Solving"],
      "years_of_experience_required": 5,
      "reason_for_candidate_fit": "Lim has 8 years of experience as a Senior Data Scientist, with expertise in machine learning and analytical projects."
    },
    {
      "company_name": "LinkedIn",
      "job_ti





[1m[95m# Agent:[00m [1m[92mJob Listings matcher and scorer[00m
[95m## Final Answer:[00m [92m
{
  "scored_listings": [
    {
      "job_listing": {
        "company_name": "Indeed",
        "job_title": "Senior Data Scientist",
        "job_description": "The Senior Data Scientist plays a critical role in leading analytical projects and developing advanced models that solve complex business challenges. Candidates should bring 5-10 years of experience in data science, with deep expertise in statistical analysis, machine learning, and big data technologies.",
        "required_technical_skills": ["Machine Learning", "Statistical Analysis", "Big Data Technologies"],
        "required_non_technical_skills": ["Project Management", "Analytical Thinking", "Problem Solving"],
        "years_of_experience_required": 5,
        "reason_for_candidate_fit": "Lim has 8 years of experience as a Senior Data Scientist, with expertise in machine learning and analytical projects."
      },
   





[1m[95m# Agent:[00m [1m[92mCover Letter Writing Specialist[00m
[95m## Final Answer:[00m [92m
{
  "cover_letters": [
    {
      "scored_listing": {
        "job_listing": {
          "company_name": "Indeed",
          "job_title": "Senior Data Scientist",
          "job_description": "The Senior Data Scientist plays a critical role in leading analytical projects and developing advanced models that solve complex business challenges. Candidates should bring 5-10 years of experience in data science, with deep expertise in statistical analysis, machine learning, and big data technologies.",
          "required_technical_skills": ["Machine Learning", "Statistical Analysis", "Big Data Technologies"],
          "required_non_technical_skills": ["Project Management", "Analytical Thinking", "Problem Solving"],
          "years_of_experience_required": 5,
          "reason_for_candidate_fit": "Lim has 8 years of experience as a Senior Data Scientist, with expertise in machine learnin

In [14]:
from IPython.display import display, Markdown

display(Markdown(results.pydantic.cover_letters[0].cover_letter))

Dear Hiring Manager,

I am writing to express my interest in the Senior Data Scientist position at Indeed. With over eight years of experience in data science, I have developed a robust skill set that aligns seamlessly with the requirements of this role. My extensive expertise in machine learning and statistical analysis has enabled me to lead successful analytical projects that tackle complex business challenges. 

In my current role as a Senior Data Scientist, I have pioneered advanced models that not only drive business insights but also enhance operational efficiencies. One such project involved leading an initiative to create a national forecasting model that significantly influenced policy-making and resource planning. My technical proficiency, paired with strong project management and analytical thinking skills, ensures that I can contribute effectively to your team.

Furthermore, I believe in a collaborative approach to problem-solving, which has resulted in productive interactions across interdisciplinary teams. I am eager to bring my background in big data technologies, machine learning, and my passion for continuous learning to Indeed. I look forward to discussing how my experience can help support the ambitious projects at your organization.

Thank you for considering my application. I am excited about the opportunity to contribute to Indeed and look forward to your positive response.

Sincerely,
Lim Hsien Yong

In [15]:
import pandas as pd

costs = 0.150 * (job_crew.usage_metrics.prompt_tokens + job_crew.usage_metrics.completion_tokens) / 1_000_000
print(f"Total costs: ${costs:.4f}")

# Convert UsageMetrics instance to a DataFrame
df_usage_metrics = pd.DataFrame([job_crew.usage_metrics.dict()])
df_usage_metrics

Total costs: $0.0033


/var/folders/zb/r15p7t_d62d8m2s0623s22gh0000gn/T/ipykernel_16772/1113274036.py:7: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  df_usage_metrics = pd.DataFrame([job_crew.usage_metrics.dict()])


Unnamed: 0,total_tokens,prompt_tokens,cached_prompt_tokens,completion_tokens,successful_requests
0,22186,16978,6144,5208,9


## This is not the end though!

Clearly we can do more with the cover letter!