In [1]:
%load_ext autoreload
%autoreload 2

## The job search recruitment agency!
1. Profiler
2. Recruiter
3. Matcher
4. Cover Letter Writer

In [2]:
import warnings
import os
from dotenv import load_dotenv, find_dotenv
import yaml
from crewai import Crew, Process, Agent, Task
from crewai.tools import tool

In [3]:
_ = load_dotenv(find_dotenv())
warnings.filterwarnings("ignore")
os.environ['OPENAI_MODEL_NAME'] = 'gpt-4o-mini'

Instantiate LLM Observability

In [4]:
from phoenix.otel import register

tracer_provider = register(
    project_name = "crewai",
    endpoint="http://localhost:6006/v1/traces"
)

🔭 OpenTelemetry Tracing Details 🔭
|  Phoenix Project: crewai
|  Span Processor: SimpleSpanProcessor
|  Collector Endpoint: http://localhost:6006/v1/traces
|  Transport: HTTP
|  Transport Headers: {'api_key': '****'}
|  
|  Using a default SpanProcessor. `add_span_processor` will overwrite this default.
|  
|  `register` has set this TracerProvider as the global OpenTelemetry default.
|  To disable this behavior, call `register` with `set_global_tracer_provider=False`.



In [5]:
from openinference.instrumentation.crewai import CrewAIInstrumentor

CrewAIInstrumentor().instrument(tracer_provider=tracer_provider)

### Loading tasks and agents from yaml files

In [6]:
from typing import Dict, Tuple
def load_configs(files: Dict[str, str]) -> Tuple[dict, dict]:
    """Loads yml config files from a dictionary of filepaths

    Args:
        files (Dict[str, str]): Dictionary of filepaths containing yml files
        For example:
        {
            'agents': '../config/agents.yml',
            'tasks': '../config/tasks.yml'
        }
    """
    configs = {}
    for config_type, file_path in files.items():
        with open(file_path, 'r') as file:
            configs[config_type] = yaml.safe_load(file)
    return configs['agents'], configs['tasks']

In [7]:
job_search_agents_config, job_search_tasks_config = load_configs(
    {
        'agents': '../config/job_search_agents.yml',
        'tasks': '../config/job_search_tasks.yml',
    }
)

## Create Pydantic Models for Structured Output

In [8]:
from pydantic import BaseModel, Field
from typing import Literal, List

class WorkExperience(BaseModel):
    project_title: str = Field(..., description="Name of project")
    summary_of_experience: str = Field(..., description="Short summary of experience")
    skills_exhibited: List[str] = Field(..., description="Skills exhibited during this project")

class CandidateProfile(BaseModel):
    name: str = Field(..., description="Candidate's name")
    highest_qualification: Literal["Bachelor's Degree", "Masters Degree", "PhD"] = Field(..., description="Candidate's highest qualification")
    field_of_study: str = Field(..., description="Candidate's field of study for highest qualification")
    institution_of_study: str = Field(..., description="Candidate's institution of study for highest qualification")
    technical_skills: List[str] = Field(..., description="List of candidate's technical skills. For example 'machine learning'.")
    non_technical_skills: List[str] = Field(..., description="List of candidate's non-technical skills. For example 'strategic planning'.")
    years_of_experience: int = Field(..., description="Candidate's years of experience")
    current_job_title: str = Field(..., description="Candidate's current job title")
    work_experiences: List[WorkExperience] = Field(..., description="Candidate's job experience")

class JobListing(BaseModel):
    company_name: str = Field(..., description="Company name")
    job_title: str = Field(..., description="Job title")
    job_description: str = Field(..., description="Job description")
    required_technical_skills: List[str] = Field(..., description="List of required technical skills. For example 'machine learning'.")
    required_non_technical_skills: List[str] = Field(..., description="List of required non-technical skills. For example 'strategic planning'.")
    years_of_experience_required: int = Field(..., description="Years of experience required")
    reason_for_candidate_fit: str = Field(..., description="Reason for candidate fit to job.")

class JobListings(BaseModel):
    job_listings: List[JobListing] = Field(..., description="List of job listings")

class ScoredListing(BaseModel):
    job_listing: JobListing = Field(..., description="Job listing of interest")
    score: float = Field(..., description="Score of job listing fit to candidate profile on a scale of 1-10.")
    justfication: str = Field(..., description="Justification for score")

class ScoredListings(BaseModel):
    scored_listings: List[ScoredListing] = Field(..., description="List of scored job listings")

class CoverLetter(BaseModel):
    scored_listing: ScoredListing = Field(..., description="Scored job listing of interest")
    cover_letter: str = Field(..., description="Cover letter for scored listing")

class CoverLetters(BaseModel):
    cover_letters: List[CoverLetter] = Field(..., description="List of cover letters")

## Writing a custom tool to load in the resume!

In [9]:
from crewai.tools import BaseTool
from llama_parse import LlamaParse
from typing import Any
import nest_asyncio
nest_asyncio.apply()

class ResumeParser(BaseTool):
    name: str = "Resume reader"
    description: str = "Parses a pdf resume file neatly for LLM processing"
    parser: LlamaParse = LlamaParse(
        api_key = os.environ["LLAMA_CLOUD_API_KEY"],
        # use_vendor_multimodal_model=True,
        # vendor_multimodal_name="anthropic-sonnet-3.5",
        result_type="markdown"
    )
    
    def find_resume(self) -> str:
        """Method to search 2 levels up for resume folder"""
        
        __curdir__ = os.getcwd()
        depth = 0
        resume_dir_path = ""
        
        while (depth<3):    
            if "Resume" not in os.listdir(__curdir__):
                __curdir__ = os.path.abspath(os.path.join(__curdir__, os.path.pardir))
                depth += 1
            else:
                resume_dir_path = os.path.abspath(os.path.join(__curdir__, "Resume"))
                break
        
        return resume_dir_path
    
    def _run(self) -> Dict[str, Any]:
        """Returns the entire resume in string format."""
        
        file_path = self.find_resume()
        resume = os.path.join(
            file_path,
            os.listdir(file_path)[0]
        )
        docs = self.parser.load_data(file_path=resume)
        return {
            "resume": "\n\n".join([doc.text for doc in docs])
        }

##### Creating LlamaIndex Tavily Tool

In [10]:
from crewai_tools import LlamaIndexTool
from llama_index.tools.tavily_research import TavilyToolSpec

tavily_spec = TavilyToolSpec(api_key=os.environ["TAVILY_API_KEY"])
tavily_tool_list = tavily_spec.to_tool_list()
tavily_tools = [LlamaIndexTool.from_tool(tool) for tool in tavily_tool_list]

#### Creating our Crew, Agents and Tasks

In [11]:
profiler_agent = Agent(
    config = job_search_agents_config['profiler'],
    tools = [ResumeParser()]
)
recruiter = Agent(
    config=job_search_agents_config['recruiter'],
    tools = [*tavily_tools] 
)
matcher = Agent(
    config = job_search_agents_config['matcher'],
)
reporter = Agent(
    config=job_search_agents_config['reporter'],
)

In [12]:
profiling_task = Task(
    config = job_search_tasks_config['candidate_profiling_task'],
    agent = profiler_agent,
    output_pydantic = CandidateProfile,
    expected_output = "A clear candidate profile"
)
research_task = Task(
    config = job_search_tasks_config['research_job_listings_task'],
    agent = recruiter,
    context = [profiling_task],
    output_pydantic = JobListings,
    expected_output = "A list of job listings that fit the candidate profile"
)
scoring_task = Task(
    config = job_search_tasks_config['match_and_score_listings_task'],
    agent = matcher,
    context = [research_task, profiling_task],
    output_pydantic=ScoredListings,
    expected_output = "A list of top 3 job listings ranked with justification"
)
cover_letter_task = Task(
    config = job_search_tasks_config['cover_letter_task'],
    agent = reporter,
    context = [scoring_task, profiling_task],
    output_pydantic=CoverLetters,
    expected_output="A list of cover letters for the top 3 job listings"
)

In [13]:
job_crew = Crew(
    agents = [
        profiler_agent,
        recruiter,
        matcher,
        reporter
    ],
    tasks = [
        profiling_task,
        research_task,
        scoring_task,
        cover_letter_task
    ],
    verbose = True,
)

results = job_crew.kickoff(
    inputs = {
        'job_requirements': 'Generative AI related data scientist jobs or management positions.'
    }
)



[1m[95m# Agent:[00m [1m[92mLead Human Resource Manager[00m
[95m## Task:[00m [92mAnalyze the candidate's resume to identify their main strengths and qualifications. Highlight the candidate's technical and non-technical abilities to determine their suitability  for specific job roles.
[00m
Started parsing the file under job_id cc43dfe5-417b-4d18-8c61-1e59f2dd331a






[1m[95m# Agent:[00m [1m[92mLead Human Resource Manager[00m
[95m## Using tool:[00m [92mResume reader[00m
[95m## Tool Input:[00m [92m
"{}"[00m
[95m## Tool Output:[00m [92m
{'resume': "# LIM HSIEN YONG (“TITUS”)\n\ntituslhy@gmail.com ∙ +65 9092 6178\n\nlinkedin.com/tituslim ∙ https://github.com/tituslhy ∙ https://tituslim.onrender.com/\n\n# EDUCATION\n\n# SINGAPORE MANAGEMENT UNIVERSITY\n\nAug 2021 - Dec 2022\n\nMaster of IT in Business (Artificial Intelligence Track)\n\nDean’s List, GPA: 3.98 / 4.0\n\n- Awarded the SMU AI Talent Development Grant and SMU MITB Scholarship\n\n# NATIONAL UNIVERSITY OF SINGAPORE\n\nAug 2011 - Jul 2015\n\nBachelor of Engineering in Chemical Engineering (2nd Upper Honors)\n\n- Awarded the NUS Undergraduate Scholarship, Barco-Santander Scholarship and IE Singapore Young Talent Program Market Immersion Award\n\n# EXPERIENCE\n\n# Illumina – Singapore\n\nJan 2023 – Present\n\n# Senior Data Scientist\n\n- Prototyped instance segmentation model us



[1m[95m# Agent:[00m [1m[92mJob Openings Researcher[00m
[95m## Task:[00m [92mConduct thorough research to find potential job listings for the candidate. Utilize various online resources and databases to gather a comprehensive list of potential listings. Ensure that you meet the candidate's preferred job requirements.
Job Requirements: Generative AI related data scientist jobs or management positions.
[00m






[1m[95m# Agent:[00m [1m[92mJob Openings Researcher[00m
[95m## Thought:[00m [92mI need to find job listings that match the candidate's requirements for generative AI-related data scientist jobs or management positions.[00m
[95m## Using tool:[00m [92msearch[00m
[95m## Tool Input:[00m [92m
"{\"query\": \"Generative AI data scientist OR management positions\", \"max_results\": 6}"[00m
[95m## Tool Output:[00m [92m
[Document(id_='8a1577e9-7f09-4ce4-a41e-e30c61210a60', embedding=None, metadata={'url': 'https://www.indeed.com/q-generative-ai-data-science-jobs.html'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=MediaResource(embeddings=None, data=None, text='2,910 Generative Ai Data Science jobs available on Indeed.com. Apply to Data Scientist, Data Science Manager, Staff Scientist and more!', path=None, url=None, mimetype=None), image_resource=None, audio_reso





[1m[95m# Agent:[00m [1m[92mJob Openings Researcher[00m
[95m## Final Answer:[00m [92m
{
  "job_listings": [
    {
      "company_name": "Honeywell",
      "job_title": "Generative AI and Data Scientist",
      "job_description": "Join Honeywell's team as a Generative AI and Data Scientist. Apply today on AI Careers!",
      "required_technical_skills": ["Generative AI", "Data Science", "Machine Learning"],
      "required_non_technical_skills": ["Communication", "Collaboration"],
      "years_of_experience_required": 5,
      "reason_for_candidate_fit": "The candidate has experience as a Senior Data Scientist and strong technical expertise in AI and Machine Learning."
    },
    {
      "company_name": "Amazon",
      "job_title": "Data Scientist, Generative AI Innovation Center",
      "job_description": "Are you looking to work at the forefront of Machine Learning and AI? Would you be excited to apply cutting edge Generative AI algorithms to solve real-world problems with s





[1m[95m# Agent:[00m [1m[92mJob Listings matcher and scorer[00m
[95m## Final Answer:[00m [92m
{
  "scored_listings": [
    {
      "job_listing": {
        "company_name": "Generative AI Careers",
        "job_title": "Lead Data Scientist – Generative AI",
        "job_description": "We're looking for a lead data scientist specializing in Generative AI to help drive innovative projects.",
        "required_technical_skills": ["Data Science", "Generative AI", "Python"],
        "required_non_technical_skills": ["Leadership", "Focus on Business Impact"],
        "years_of_experience_required": 5,
        "reason_for_candidate_fit": "The candidate's leadership experience and technical mastery in data science makes them an ideal candidate."
      },
      "score": 9.5,
      "justfication": "The candidate has strong experience in data science, relevant technical skills including Python, and has demonstrated leadership capabilities in their current role."
    },
    {
      "job_l





[1m[95m# Agent:[00m [1m[92mCover Letter Writing Specialist[00m
[95m## Final Answer:[00m [92m
{
  "cover_letters": [
    {
      "scored_listing": {
        "job_listing": {
          "company_name": "Generative AI Careers",
          "job_title": "Lead Data Scientist – Generative AI",
          "job_description": "We're looking for a lead data scientist specializing in Generative AI to help drive innovative projects.",
          "required_technical_skills": ["Data Science", "Generative AI", "Python"],
          "required_non_technical_skills": ["Leadership", "Focus on Business Impact"],
          "years_of_experience_required": 5,
          "reason_for_candidate_fit": "The candidate's leadership experience and technical mastery in data science makes them an ideal candidate."
        },
        "score": 9.5,
        "justfication": "The candidate has strong experience in data science, relevant technical skills including Python, and has demonstrated leadership capabilities in 

In [None]:
from IPython.display import display, Markdown


display(Markdown(results.pydantic.cover_letters[0].cover_letter))

Lim Hsien Yong
[Your Address]
[City, State, Zip]
[Email]
[Phone Number]
[Date]

Hiring Manager
Generative AI Careers
[Company Address]
[City, State, Zip]

Dear Hiring Manager,

I am writing to express my interest in the Lead Data Scientist position at Generative AI Careers, as advertised. With a Master of IT in Business (Artificial Intelligence Track) and over five years of hands-on experience in data science, I am excited about the opportunity to leverage my expertise in Generative AI to drive innovative projects at your esteemed company.

In my most recent role as a Senior Data Scientist, I led the project for developing a Defect Detection system in Flowcells. This involved designing and prototyping advanced instance segmentation models, utilizing a Siamese Network architecture to deliver precise defect identification, resulting in significant cost savings and enhanced image matching efficiency. This project honed my technical skills in Python and reinforced my ability to implement impactful data-driven solutions.

Another project I successfully spearheaded was the development of a Generative BI Assistant Application. This application enabled users to extract insights through natural language queries, showcasing my strong expertise in Natural Language Processing and application development. This initiative not only improved operational efficiency but also provided valuable insights tailored to user needs, demonstrating my commitment to prioritizing business impact.

I believe my technical depth in data science and AI, coupled with my leadership abilities, aligns perfectly with the requirements of the Lead Data Scientist role. I am passionate about harnessing the power of Generative AI to unlock new potential and drive meaningful change within organizations.

Thank you for considering my application. I look forward to the possibility of discussing how my experience and vision can contribute to the exciting projects at Generative AI Careers.

Sincerely,
Lim Hsien Yong

In [28]:
import pandas as pd

costs = 0.150 * (job_crew.usage_metrics.prompt_tokens + job_crew.usage_metrics.completion_tokens) / 1_000_000
print(f"Total costs: ${costs:.4f}")

# Convert UsageMetrics instance to a DataFrame
df_usage_metrics = pd.DataFrame([job_crew.usage_metrics.dict()])
df_usage_metrics

Total costs: $0.0029


/var/folders/zb/r15p7t_d62d8m2s0623s22gh0000gn/T/ipykernel_24720/1113274036.py:7: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  df_usage_metrics = pd.DataFrame([job_crew.usage_metrics.dict()])


Unnamed: 0,total_tokens,prompt_tokens,cached_prompt_tokens,completion_tokens,successful_requests
0,19507,14607,4096,4900,9


## This is not the end though!

Clearly we can do more with the cover letter!