In [8]:
import json
import os
from typing import List

import instructor
from anthropic import Anthropic
from openai import OpenAI
from pydantic import BaseModel

# Load sample jobs from JSON file
with open('sample_jobs.json', 'r') as f:
    job_listings = json.load(f)

class JobListing(BaseModel):
    id: str
    category: str
    explanation: str

class CategorizedListings(BaseModel):
    listings: List[JobListing]

def get_prompt(job_listings_json):
    return f"""You are tasked with categorizing job listings based on a specific job requirement profile. Here's the profile and categorization system:

Job Requirement Profile:
- Ideal Job Title: Senior Software Engineer (Frontend, Backend, or Full Stack)
- Key Technical Skills: Web development (senior level), Full stack development (senior level)
- Work Arrangement: Fully remote (required)
- Experience Requirement: At least 5 years of experience (indicative of senior level)
- Flexible Criteria: No specific requirements for salary, industry, or company size

Categorization System:

1. Ideal Match:
   - Matches job title (Senior Software Engineer, including frontend/backend/full stack)
   - Requires senior-level web or full stack development skills
   - Fully remote
   - Requires at least 5 years of experience

2. Strong Potential:
   - Similar job title (e.g., Lead Developer, Software Architect)
   - Requires senior-level development skills
   - Fully remote
   - Requires at least 5 years of experience

3. Worth Considering:
   - Related job title
   - Requires web or full stack development skills (not necessarily senior level)
   - Fully remote
   - May not explicitly state 5 years of experience, but implies senior-level experience

4. Not Suitable:
   - Unrelated job title
   - Does not require web or full stack development skills
   - Not fully remote
   - Requires less than 5 years of experience or entry-level position

Instructions:
1. You will be provided with a list of job listings in JSON format.
2. For each job listing, analyze the information provided and categorize it according to the system above.
3. If a listing doesn't explicitly mention all criteria, make a reasonable inference based on the available information.
4. Create a summary for each listing, including:
   - Job title
   - Company
   - Category (Ideal Match, Strong Potential, Worth Considering, or Not Suitable)
   - Brief explanation for the categorization

5. Present your results in the following format:

<categorized_listings>
<listing>
<id>[Job ID]</id>
<category>[Category]</category>
<explanation>[Brief explanation for categorization]</explanation>
</listing>
[Repeat for each job listing]
</categorized_listings>

Here are the job listings to categorize:

<job_listings>
{job_listings_json}
</job_listings>

Analyze each listing and provide your categorization as instructed above."""




In [26]:
import litellm
import instructor
import json

def track_cost_callback(kwargs, completion_response, start_time, end_time):
    try:
        response_cost = kwargs.get("response_cost", 0)
        print(f"Cost for {kwargs['model']}: ${response_cost:.6f}")
        duration = end_time - start_time
        print(f"Duration: {duration.total_seconds():.2f} seconds")
        print(f"Response: {completion_response}")
        if isinstance(completion_response, litellm.ModelResponse) and 'usage' in completion_response:
            usage = completion_response['usage']
            print(f"Completion tokens: {usage.completion_tokens}")
            print(f"Prompt tokens: {usage.prompt_tokens}")
            print(f"Total tokens: {usage.total_tokens}")
    except:
        pass

litellm.success_callback = [track_cost_callback]

def categorize_jobs(job_listings, model):
    client = instructor.from_litellm(litellm.completion)
    job_listings_json = json.dumps(job_listings)
    
    response = client.chat.completions.create(
        model=model,
        max_tokens=1000,
        temperature=0,
        messages=[{"role": "user", "content": get_prompt(job_listings_json)}],
        response_model=CategorizedListings,
    )
    
    return response

def pretty_print_results(results):
    for listing in results.listings:
        print(f"\nJob ID: {listing.id}")
        print(f"Category: {listing.category}")
        print(f"Explanation: {listing.explanation}")

# Usage examples:
# anthropic_results = categorize_jobs(job_listings, model="claude-3-sonnet-20240229")
# openai_results = categorize_jobs(job_listings, model="gpt-4-turbo-preview")

In [14]:
anthropic_results = categorize_jobs(job_listings, model="claude-3-5-sonnet-20240620")

Cost for claude-3-5-sonnet-20240620: $0.053391


TypeError: pretty_print_results() missing 1 required positional argument: 'model_name'

In [24]:
openai_mini_results = categorize_jobs(job_listings, model="gpt-4o-mini")

Cost for gpt-4o-mini: $0.002116
Start time: 2024-07-18 18:01:38.528116
End time: 2024-07-18 18:01:49.249765
Duration: 0:00:10.721649 seconds


In [25]:
categorize_jobs(job_listings, model="gpt-4o-mini")
categorize_jobs(job_listings, model="gpt-4o")
categorize_jobs(job_listings, model="gpt-3.5-turbo")
categorize_jobs(job_listings, model="gpt-4-turbo")

Cost for gpt-4o-mini: $0.002113
Start time: 2024-07-18 18:10:45.959514
End time: 2024-07-18 18:10:55.900048
Duration: 0:00:09.940534 seconds
Cost for gpt-4o: $0.068175
Start time: 2024-07-18 18:10:55.908175
End time: 2024-07-18 18:11:10.907508
Duration: 0:00:14.999333 seconds
Cost for gpt-3.5-turbo: $0.006744
Start time: 2024-07-18 18:11:10.916806
End time: 2024-07-18 18:11:21.295298
Duration: 0:00:10.378492 seconds
Cost for gpt-4-turbo: $0.135310
Start time: 2024-07-18 18:11:21.301942
End time: 2024-07-18 18:11:46.372139
Duration: 0:00:25.070197 seconds


CategorizedListings(listings=[JobListing(id='dice_5b997e14-1e17-4bce-a78d-60b460279f35', category='Not Suitable', explanation="The job title 'Materials Program Manager' and the focus on materials planning and manual MRP do not match the required software engineering skills or the remote work criteria specified in the ideal profile."), JobListing(id='dice_6f2250fb-5309-4308-ac8d-acc93878faf3', category='Ideal Match', explanation="The job title 'Senior Software Engineer' with a focus on front-end development using React and TypeScript, along with the fully remote work arrangement, perfectly matches the ideal job profile."), JobListing(id='dice_3ccf16bc-be70-4f7d-b382-b641084fb3b3', category='Worth Considering', explanation="The job title 'Software Engineer' and the focus on CAD and Three.JS development, while not directly web or full stack, suggest related technical skills. The remote work arrangement aligns with the profile, but the seniority and exact experience requirements are unclea

In [38]:
import litellm
import json

# Load the local JSON file
with open('sample_jobs.json', 'r') as file:
    job_data = json.load(file)

# Convert the JSON data to a string
job_data_str = json.dumps(job_data)

# Count tokens
token_count = litellm.token_counter(model="gpt-4o-mini", text=job_data_str)

print(f"Number of tokens in sample_jobs.json: {token_count}")

Number of tokens in sample_jobs.json: 10479


In [40]:
categorize_jobs(job_listings * 10, model="gpt-4o-mini")

Cost for gpt-4o-mini: $0.016404
Duration: 47.41 seconds
Response: ModelResponse(id='chatcmpl-9mVp9O1BZ4UaGCiB2LVQEshBDTyGV', choices=[Choices(finish_reason='length', index=0, message=Message(content=None, role='assistant', tool_calls=[ChatCompletionMessageToolCall(function=Function(arguments='{"listings":[{"id":"dice_6f2250fb-5309-4308-ac8d-acc93878faf3","category":"Ideal Match","explanation":"The job title is \'Senior Software Engineer\', requires senior-level web development skills (React, TypeScript), is fully remote, and requires at least 5 years of experience."},{"id":"dice_3ccf16bc-be70-4f7d-b382-b641084fb3b3","category":"Strong Potential","explanation":"The job title is \'Software Engineer (CAD, Three.JS)\', which is similar to the ideal title. It requires senior-level development skills, is fully remote, and requires at least 5 years of experience."},{"id":"dice_c81dad63-c31e-4173-812a-146dc92f470d","category":"Ideal Match","explanation":"The job title is \'Sr Software Developm

IncompleteOutputException: The output is incomplete due to a max_tokens length limit.

In [18]:
def compare_results(anthropic_results, openai_mini_results):
    for a_listing, o_listing in zip(anthropic_results.listings, openai_mini_results.listings):
        print(f"\nJob ID: {a_listing.id}")
        print("Anthropic Results:")
        print(f"Category: {a_listing.category}")
        print(f"Explanation: {a_listing.explanation}")
        print("\nOpenAI Mini Results:")
        print(f"Category: {o_listing.category}")
        print(f"Explanation: {o_listing.explanation}")
        print("-" * 50)

compare_results(anthropic_results, openai_mini_results)


Job ID: dice_5b997e14-1e17-4bce-a78d-60b460279f35
Anthropic Results:
Category: Not Suitable
Explanation: This is a Materials Program Manager role, not a Software Engineering position. It does not match the required technical skills or job title.

OpenAI Mini Results:
Category: Not Suitable
Explanation: The job title 'Materials Program Manager' is unrelated to software engineering, and it does not require web or full stack development skills. It is not fully remote as it mentions a location in Taiwan.
--------------------------------------------------

Job ID: dice_6f2250fb-5309-4308-ac8d-acc93878faf3
Anthropic Results:
Category: Ideal Match
Explanation: Senior Software Engineer position with focus on frontend development (React), fully remote, and requires 5+ years of experience. Matches all key criteria.

OpenAI Mini Results:
Category: Ideal Match
Explanation: The job title 'Senior Software Engineer' matches the ideal job title. It requires senior-level web development skills (React,