In [15]:
import json
import pandas as pd
from sklearn.model_selection import train_test_split
import requests
import httpx
import tqdm

In [16]:
calibration_results_urls = pd.read_csv('data/serval_calibration_results_with_urls.csv')
serval_description = open('data/serval_description.txt').read()

train, test = train_test_split(calibration_results_urls, test_size=0.3, random_state=42)

In [17]:
async def get_styx_evaluation(job_description: str, calibrations: list[dict], candidate_url: str = None, candidate_dict: dict = None):
    async with httpx.AsyncClient() as client:
        response = await client.post(
            f'http://127.0.0.1:8000/headless_evaluate',
            json={
                'job_description': job_description,
                'calibrations': calibrations,
                'url': candidate_url,
                'candidate': candidate_dict
            },
            timeout=200.0
        )
        return response.json()

In [18]:
def candidate_paraform_to_styx(candidate_id):
    response = requests.get(f'https://www.paraform.com/api/candidate/find_candidate?candidate_id={candidate_id}').json()
    styx_candidate = {}
    if response['name']:
        styx_candidate['full_name'] = response['name']
    if response['experiences']:
        styx_candidate['experiences'] = []
        for experience in response['experiences']:
            styx_experience = {}
            if experience['role_title']:
                styx_experience['title'] = experience['role_title']
            if experience['company'] and experience['company']['name']:
                    styx_experience['company'] = experience['company']['name']
            if experience['description']:
                styx_experience['description'] = experience['description']
            if experience['start_date']:
                styx_experience['starts_at'] = experience['start_date']
            if experience['end_date']:
                styx_experience['ends_at'] = experience['end_date']
            if experience['location']:
                styx_experience['location'] = experience['location']
            styx_candidate['experiences'].append(styx_experience)
        
        if len(response['experiences']) > 0 and 'title' in response['experiences'][0]:
            styx_candidate['occupation'] = response['experiences'][0]['title']

    if response['education']:
        styx_candidate['education'] = []
        for education in response['education']:
            styx_education = {}
            if education['degree']:
                styx_education['degree_name'] = education['degree']
            if education['school'] and education['school']['name']:
                styx_education['school'] = education['school']['name']
            if education['start_date']:
                styx_education['starts_at'] = education['start_date']
            if education['end_date']:
                styx_education['ends_at'] = education['end_date']
            if education['school'] and education['school']['logo_src']:
                styx_education['logo_url'] = education['school']['logo_src']
            styx_candidate['education'].append(styx_education)

    if response['one_liner']:
        styx_candidate['headline'] = response['one_liner']
    if response['location']:
        styx_candidate['city'] = response['location']
    if 'linkedin_user' in response:
        styx_candidate['public_identifier'] = response['linkedin_user']

    
    return styx_candidate

In [50]:
async def run_experiment(data: pd.DataFrame, calibrations: pd.DataFrame, company: str, job_description: str, experiment_name: str, use_url: bool = True):
    predictions = data.copy()
    for idx, row in tqdm.tqdm(predictions.iterrows(), total=len(predictions)):
        try:
            calibrations_list = calibrations.to_dict(orient='records')
            styx_calibrations = []
            for calibration in calibrations_list:
                temp = {}
                if use_url:
                    temp['url'] = calibration['linkedin_url']
                else:
                    temp['candidate'] = candidate_paraform_to_styx(calibration['candidate_id'])
                temp['calibration_result'] = calibration['calibration_result']
                styx_calibrations.append(temp)
            if use_url:
                response = await get_styx_evaluation(job_description, styx_calibrations, candidate_url=row['linkedin_url'])
            else:
                response = await get_styx_evaluation(job_description, styx_calibrations, candidate_dict=candidate_paraform_to_styx(row['candidate_id']))
            predictions.at[idx, 'fit_score'] = response['value']
            predictions.at[idx, 'evaluation'] = response['evaluation']
        except Exception as e:
            print(e)
            continue

    predictions.to_csv(f'results/{company}_predictions_{experiment_name}.csv', index=False)


In [66]:
# await run_experiment(calibration_results_urls.iloc[10:], calibration_results_urls.head(3), 'serval', serval_description, '3_calibration_base_paraform_3', use_url=False)
await run_experiment(calibration_results_urls.iloc[10:], calibration_results_urls.head(10), 'serval', serval_description, '10_calibration_base_paraform_3', use_url=False)
# await run_experiment(calibration_results_urls.iloc[10:], pd.DataFrame(), 'serval', serval_description, 'no_calibration_base_paraform_3', use_url=False)

  2%|▏         | 2/83 [00:12<08:07,  6.02s/it]


CancelledError: 

In [67]:
def calculate_metrics(predictions: pd.DataFrame):
    total = 0
    true_positives = 0
    true_negatives = 0
    false_positives = 0
    false_negatives = 0

    for idx, row in predictions.iterrows():
        if pd.isna(row['fit_score']):
            continue
            
        total += 1
        predicted_good_fit = row['fit_score'] > 2
        actual_good_fit = row['calibration_result'] == 'GOOD_FIT' or row['calibration_result'] == 'MAYBE'
        
        if predicted_good_fit and actual_good_fit:
            true_positives += 1
        elif predicted_good_fit and not actual_good_fit:
            false_positives += 1
        elif not predicted_good_fit and actual_good_fit:
            false_negatives += 1
        elif not predicted_good_fit and not actual_good_fit:
            true_negatives += 1

    accuracy = (true_positives + true_negatives) / total
    precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
    recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0
    f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    return accuracy, precision, recall, f1_score

In [68]:
search_results_predictions = pd.read_csv('results/serval_predictions_no_calibration_base_paraform_2.csv')
accuracy, precision, recall, f1_score = calculate_metrics(search_results_predictions)
print(f"Accuracy (treating MAYBE and BAD_FIT as same class): {accuracy:.3f}")
print(f"Precision: {precision:.3f}")
print(f"Recall: {recall:.3f}")
print(f"F1 Score: {f1_score:.3f}")

Accuracy (treating MAYBE and BAD_FIT as same class): 0.783
Precision: 0.641
Recall: 0.862
F1 Score: 0.735


In [69]:
search_results_predictions = pd.read_csv('results/serval_predictions_3_calibration_base_paraform_2.csv')
accuracy, precision, recall, f1_score = calculate_metrics(search_results_predictions)
print(f"Accuracy (treating MAYBE and BAD_FIT as same class): {accuracy:.3f}")
print(f"Precision: {precision:.3f}")
print(f"Recall: {recall:.3f}")
print(f"F1 Score: {f1_score:.3f}")

Accuracy (treating MAYBE and BAD_FIT as same class): 0.747
Precision: 0.591
Recall: 0.897
F1 Score: 0.712


In [70]:
search_results_predictions = pd.read_csv('results/serval_predictions_10_calibration_base_paraform_2.csv')
accuracy, precision, recall, f1_score = calculate_metrics(search_results_predictions)
print(f"Accuracy (treating MAYBE and BAD_FIT as same class): {accuracy:.3f}")
print(f"Precision: {precision:.3f}")
print(f"Recall: {recall:.3f}")
print(f"F1 Score: {f1_score:.3f}")

Accuracy (treating MAYBE and BAD_FIT as same class): 0.711
Precision: 0.556
Recall: 0.862
F1 Score: 0.676
