In [66]:
%load_ext autoreload
%autoreload 2
    
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from wordcloud import WordCloud

from tenacity import retry, stop_after_attempt, wait_exponential

from openai_utils import get_gpt_tokens
from openai_utils import get_batch_completion_tasks
from openai_utils import generate_jsonl_file
from openai_utils import upload_file_to_openai
from openai_utils import create_batch_completion_job
from openai_utils import get_batch_job_status
from openai_utils import get_batch_job_results
from openai_utils import save_batch_results_to_jsonl
from openai_utils import map_jsonl_batch_completion_results_to_df
from openai_utils import generate_openai_fine_tuning_json_from_df
from openai_utils import upload_fine_tuning_file_to_openai
from openai_utils import fine_tune_gpt_model
from openai_utils import check_fine_tuning_job_status
from openai_utils import get_fine_tuned_model_details

from utils.dataframe_utils import read_pandas_csv_clean_columns_names
from utils.dataframe_utils import get_model_cross_validation
from utils.utils import remove_pii

from pprint import pprint
import time
import json
import re
import os

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [67]:
current_dir = os.getcwd()
datasets_dir = os.path.join(current_dir, '..', '..', 'datasets')
dataset_path = os.path.abspath(os.path.join(datasets_dir, 'contact_sales_inbound_segmentation_dataset_original.csv'))
print(dataset_path)

/Users/nathaniel.maymon/Downloads/Share_to_drive/text_classification_model_comparison/datasets/contact_sales_inbound_segmentation_dataset_original.csv


In [68]:
df = read_pandas_csv_clean_columns_names(dataset_path)

print(df.columns, '\n')
df.head()

Index(['gpt_decision', 'unqualify_sub_reason', 'sales_type',
       'unqualified_reason', 'contact_us_segmentation', 'sql_sql_name', 'lead',
       'status', 'contact_us_form_description', 'country_code', 'country',
       'prior_country', 'qualified_for_cvr_report', 'scale_won_for_cvr_report',
       'won_sale_for_cvr_report', 'won_opp_net_new_arr_amount',
       'scale_meeting', 'days_since_last_engagement'],
      dtype='object') 



Unnamed: 0,gpt_decision,unqualify_sub_reason,sales_type,unqualified_reason,contact_us_segmentation,sql_sql_name,lead,status,contact_us_form_description,country_code,country,prior_country,qualified_for_cvr_report,scale_won_for_cvr_report,won_sale_for_cvr_report,won_opp_net_new_arr_amount,scale_meeting,days_since_last_engagement
0,support,,Self Service,,,A-270959,Venu Koritala,Qualified,Looking for an individual plan,,India,,1,0,0,0.0,0,165.0
1,sales,,Scale,,,A-270963,,Qualified,I'd like to know if you integrate with Talisma...,United Kingdom,United Kingdom,,1,1,1,6700.32,1,111.0
2,sales,,,Auto Closure,,A-270966,Emilie Lefevre,Unqualified,Hello - would like to learn how Lusha can help...,,United Kingdom,,0,0,0,0.0,0,
3,sales,,,Auto Closure,Monthly,A-270968,,Unqualified,I want to know all your plans and pricing and ...,India,India,,0,0,0,0.0,0,165.0
4,support,,,Support Request,,A-270969,,Unqualified,Im trying to make my account but i get the err...,Netherlands,Netherlands,,0,0,0,0.0,0,


In [69]:
current_dir = os.getcwd()
datasets_dir = os.path.join(current_dir, '..', '..', 'datasets')
dataset_path = os.path.abspath(os.path.join(datasets_dir, 'contact_sales_fine_tuning_set.csv'))

df_ft = read_pandas_csv_clean_columns_names(dataset_path)
df_ft['target'] = df_ft['target'].apply(lambda x: x.lower())
df_ft.head()

Unnamed: 0,submission_id,contact_us_form_description,target
0,3836,product or technical question -,support
1,4959,other - What's the API pricing?,sales
2,4413,Looking for SIP to integrate with Hubspot.,sales
3,7494,upgrade plan - I need to add 1 additional seat.,sales
4,2077,I am a fleet owner of cargo vans and would lik...,irrelevant


In [70]:
current_dir = os.getcwd()
datasets_dir = os.path.join(current_dir, '..', '..', 'datasets')
dataset_path = os.path.abspath(os.path.join(datasets_dir, 'contact_us_form_submissions_notes.csv'))

df_act = read_pandas_csv_clean_columns_names(dataset_path)
df_act.head()

Unnamed: 0,contact_us_form_description,gpt_decision,target
0,Looking for an individual plan,support,sales
1,I'd like to know if you integrate with Talisma...,sales,sales
2,Hello - would like to learn how Lusha can help...,sales,sales
3,I want to know all your plans and pricing and ...,sales,sales
4,Im trying to make my account but i get the err...,support,support


In [128]:
df_act.shape

(8068, 3)

In [71]:
sum(df['contact_us_form_description'].isna())

20

In [72]:
df['unqualified_reason'].value_counts()

unqualified_reason
Auto Closure                      4348
Irrelevant Contact / Company       635
Not Interested                     515
Support Request                    279
Duplicate SQL                      118
Non Responsive                     108
Compliance                          62
Self-Service Plan                   37
Timing/Future                       36
Missing Feature                      5
Account restricted by CSM / AE       3
Fraud                                3
Name: count, dtype: int64

In [73]:
df_samples = df[
    (~df['unqualified_reason'].isin(['Duplicate SQL', 'Account restricted by CSM / AE', 'Fraud'])) &
    (
        (
            (df['gpt_decision'] == 'sales') &
            (df['unqualified_reason'].isin(['Irrelevant Contact / Company', 'Support Request', 'Not Interested']))
        ) |
        (df['gpt_decision'].isin(['support', 'irrelevant']))
    ) &
    (~df['contact_us_form_description'].isna())
]
df_samples.shape[0]

3463

In [74]:
df_samples.gpt_decision.value_counts()

gpt_decision
support       2125
sales          861
irrelevant     477
Name: count, dtype: int64

In [75]:
df_samples = df_samples[['contact_us_form_description', 'gpt_decision']].groupby('gpt_decision').sample(n=400, random_state=12345)
df_samples['gpt_decision'].value_counts()

gpt_decision
irrelevant    400
sales         400
support       400
Name: count, dtype: int64

In [76]:
df_samples.head()

Unnamed: 0,contact_us_form_description,gpt_decision
8653,Add to your database,irrelevant
6369,"Dear Potential Sponsor,\n\nWe are excited to i...",irrelevant
6422,"Hello Friend,\n\nMy name is Alex Smith from C...",irrelevant
260,call on +91 7702924763,irrelevant
2952,Dear sir\nwe are large Trading Firm in Banglad...,irrelevant


In [77]:
idx = 8653
col = 'contact_us_form_description'
print(df.loc[idx, col])
print(df_samples.loc[idx, col])
print(df_ft[df_ft['submission_id']==idx][col])

Add to your database
Add to your database
697    Add to your database
Name: contact_us_form_description, dtype: object


In [78]:
df = df.reset_index()
df = pd.merge(df, df_ft[['submission_id', 'target']], how='left', left_on='index', right_on='submission_id')
df.head()

Unnamed: 0,index,gpt_decision,unqualify_sub_reason,sales_type,unqualified_reason,contact_us_segmentation,sql_sql_name,lead,status,contact_us_form_description,...,country,prior_country,qualified_for_cvr_report,scale_won_for_cvr_report,won_sale_for_cvr_report,won_opp_net_new_arr_amount,scale_meeting,days_since_last_engagement,submission_id,target
0,0,support,,Self Service,,,A-270959,Venu Koritala,Qualified,Looking for an individual plan,...,India,,1,0,0,0.0,0,165.0,,
1,1,sales,,Scale,,,A-270963,,Qualified,I'd like to know if you integrate with Talisma...,...,United Kingdom,,1,1,1,6700.32,1,111.0,,
2,2,sales,,,Auto Closure,,A-270966,Emilie Lefevre,Unqualified,Hello - would like to learn how Lusha can help...,...,United Kingdom,,0,0,0,0.0,0,,,
3,3,sales,,,Auto Closure,Monthly,A-270968,,Unqualified,I want to know all your plans and pricing and ...,...,India,,0,0,0,0.0,0,165.0,,
4,4,support,,,Support Request,,A-270969,,Unqualified,Im trying to make my account but i get the err...,...,Netherlands,,0,0,0,0.0,0,,,


In [79]:
df['ft'] = df.apply(lambda r: True if pd.notna(r['target']) else np.nan, axis=1)
df.head()

Unnamed: 0,index,gpt_decision,unqualify_sub_reason,sales_type,unqualified_reason,contact_us_segmentation,sql_sql_name,lead,status,contact_us_form_description,...,prior_country,qualified_for_cvr_report,scale_won_for_cvr_report,won_sale_for_cvr_report,won_opp_net_new_arr_amount,scale_meeting,days_since_last_engagement,submission_id,target,ft
0,0,support,,Self Service,,,A-270959,Venu Koritala,Qualified,Looking for an individual plan,...,,1,0,0,0.0,0,165.0,,,
1,1,sales,,Scale,,,A-270963,,Qualified,I'd like to know if you integrate with Talisma...,...,,1,1,1,6700.32,1,111.0,,,
2,2,sales,,,Auto Closure,,A-270966,Emilie Lefevre,Unqualified,Hello - would like to learn how Lusha can help...,...,,0,0,0,0.0,0,,,,
3,3,sales,,,Auto Closure,Monthly,A-270968,,Unqualified,I want to know all your plans and pricing and ...,...,,0,0,0,0.0,0,165.0,,,
4,4,support,,,Support Request,,A-270969,,Unqualified,Im trying to make my account but i get the err...,...,,0,0,0,0.0,0,,,,


In [80]:
df.drop(['index', 'submission_id'], axis=1, inplace=True)
df.head()

Unnamed: 0,gpt_decision,unqualify_sub_reason,sales_type,unqualified_reason,contact_us_segmentation,sql_sql_name,lead,status,contact_us_form_description,country_code,country,prior_country,qualified_for_cvr_report,scale_won_for_cvr_report,won_sale_for_cvr_report,won_opp_net_new_arr_amount,scale_meeting,days_since_last_engagement,target,ft
0,support,,Self Service,,,A-270959,Venu Koritala,Qualified,Looking for an individual plan,,India,,1,0,0,0.0,0,165.0,,
1,sales,,Scale,,,A-270963,,Qualified,I'd like to know if you integrate with Talisma...,United Kingdom,United Kingdom,,1,1,1,6700.32,1,111.0,,
2,sales,,,Auto Closure,,A-270966,Emilie Lefevre,Unqualified,Hello - would like to learn how Lusha can help...,,United Kingdom,,0,0,0,0.0,0,,,
3,sales,,,Auto Closure,Monthly,A-270968,,Unqualified,I want to know all your plans and pricing and ...,India,India,,0,0,0,0.0,0,165.0,,
4,support,,,Support Request,,A-270969,,Unqualified,Im trying to make my account but i get the err...,Netherlands,Netherlands,,0,0,0,0.0,0,,,


In [81]:
# Target "sales"
def sales_class(row):
    res = 0
    
    if row['status'] in ('Qualified', 'In Contact') or \
       row['sales_type'] in ('Self Service', 'Scale') or \
       row['won_sale_for_cvr_report'] > 0:
        res = 1

    if res == 0 and isinstance(row['contact_us_form_description'], str) and \
        (row['contact_us_form_description'].lower() == 'upgrade plan -' or \
        'demo -' in row['contact_us_form_description'].lower()):
        res = 1
    
    return res

df['sales_class'] = df.apply(sales_class, axis=1)

# Target "support"
def support_class(row):
    res = 0

    if row['unqualified_reason'] == 'Support Request' or \
      (row['days_since_last_engagement'] is None and row['gpt_decision'] not in ('irrelevant', 'sales')):
        res = 1

    return res

df['support_class'] = df.apply(support_class, axis=1)

# Target Irelevant
def irrelevant_class(row):
    res = 0

    if row['gpt_decision'] == 'irrelevant':
        res = 1

    return res

df['irrelevant_class'] = df.apply(irrelevant_class, axis=1)

# Create Target Column
def get_traget_col(row):
    res = None

    if row['ft'] == True: return row['target']
    
    if   row['support_class'] == 1:    res = 'support'   
    elif row['sales_class'] == 1:      res = 'sales'        
    elif row['irrelevant_class'] == 1: res = 'irrelevant'
    else:                              res = row['gpt_decision']

    return res

df['target'] = df.apply(get_traget_col, axis=1)

In [83]:
df.loc[0, 'target'] = 'support'

In [84]:
df.head()

Unnamed: 0,gpt_decision,unqualify_sub_reason,sales_type,unqualified_reason,contact_us_segmentation,sql_sql_name,lead,status,contact_us_form_description,country_code,...,scale_won_for_cvr_report,won_sale_for_cvr_report,won_opp_net_new_arr_amount,scale_meeting,days_since_last_engagement,target,ft,sales_class,support_class,irrelevant_class
0,support,,Self Service,,,A-270959,Venu Koritala,Qualified,Looking for an individual plan,,...,0,0,0.0,0,165.0,support,,1,0,0
1,sales,,Scale,,,A-270963,,Qualified,I'd like to know if you integrate with Talisma...,United Kingdom,...,1,1,6700.32,1,111.0,sales,,1,0,0
2,sales,,,Auto Closure,,A-270966,Emilie Lefevre,Unqualified,Hello - would like to learn how Lusha can help...,,...,0,0,0.0,0,,sales,,0,0,0
3,sales,,,Auto Closure,Monthly,A-270968,,Unqualified,I want to know all your plans and pricing and ...,India,...,0,0,0.0,0,165.0,sales,,0,0,0
4,support,,,Support Request,,A-270969,,Unqualified,Im trying to make my account but i get the err...,Netherlands,...,0,0,0.0,0,,support,,0,1,0


In [85]:
df.to_csv('contact_sales_inbound_segmentation_dataset_original_ft.csv')

In [106]:
df['status'].value_counts()

status
Unqualified           6107
Qualified             2536
In Contact             940
Attempting             416
SAL                     58
New                     20
Additional Request       6
Re-Open                  1
Name: count, dtype: int64

In [88]:
df['unqualified_reason'].value_counts()

unqualified_reason
Auto Closure                      4348
Irrelevant Contact / Company       635
Not Interested                     515
Support Request                    279
Duplicate SQL                      118
Non Responsive                     108
Compliance                          62
Self-Service Plan                   37
Timing/Future                       36
Missing Feature                      5
Account restricted by CSM / AE       3
Fraud                                3
Name: count, dtype: int64

In [129]:
df_filtered = df[
    ~df['unqualified_reason'].isin(['Duplicate SQL', 'Account restricted by CSM / AE', 'Fraud']) &
    ~df['status'].isin(['Additional Request', 'Re-Open']) &
    ~df['contact_us_form_description'].isna() &
    ~df['contact_us_form_description'].isin(['']) &
    ~df['contact_us_form_description'].str.contains("RE:", na=False) &
    ~df['target'].isna()
]
df_filtered.shape

(9933, 23)

In [130]:
df_filtered = df_filtered[['sql_sql_name', 'contact_us_form_description', 'target', 'status', 'sales_type', 'ft']]

In [132]:
df_filtered.to_csv('contanct_sales_form_notes.csv', index=False)

In [6]:
df['text_len'] = df['contact_us_form_description'].apply(lambda x: len(x))
df['input_pii_removed'] = df['contact_us_form_description'].apply(remove_pii)

In [7]:
def correct_input(text: str) -> str:
    return fr'''Here's a prospect's input for a sales form to the question "how can we help?", help me classify and route it:
    {text}'''
    
df['corrected_input'] = df['input_pii_removed'].apply(correct_input)

In [8]:
system_prompt = f'''You're a sales rep at Lusha, and an expert in classifying incoming prospects.

In Lusha, we have prospects that connect by submitting a sales form, and leave notes. Many times through the form notes, it's possible to know how to route the prospect internally.

If a prospect wants to follow up on the offer or is genuinely interested, or wants to substantially upgrade their account such as adding multiple users \ seats e.g “I want to add 3 users to my account” and not “I want to add a user to my account” etc.(as having more than two users can indicate on a potential enterprise lead), or massively increase credits, or asks about meeting or suggests time to meet, or is interested in a proof of concept or a trial, or they inquire about intent\ technographic \ job-change filters, Integrations, API features, or ask about\mention a competitor, it should probably go to the sales team.

On the other hand, if the prospect is having an issue with the product (e.g. something not working, bugs, payment issues etc.) or has a general question like "what's a credit?" etc., or is asking about a pro\professional monthly plan, or wants to add one user to their account (e.g. I want to add a user to my account), it's probably more relevant for support.

Please note: If the input says something that is entirely irrelevant, or the prospect is trying to promote\sell\market\advertise\offer-services to us\you it's irrelevant for our sales and support reps (e.g. "I'd love to discuss how your team can leverage LinkedIn Navigator and Insights to complement your current efforts and exceed your revenue targets. Would you be available for a quick chat next week?" etc.) . Same case if the message is a generic system message (e.g. DMARC report, cooking advice, or generally completely off topic.
This is true in cases where the form notes don’t provide any additional context or clear intent.

Use your best judgment as a sales representative.

These are the rules:
  - If the prospect's input indicates it's relevant for the sales team, reply: "sales"
  - If the prospect's input indicates the input is irrelevant, reply: "irrelevant"
  - If the prospect's input indicates they are trying to promote or market to us, reply: "support"
  - In any other case, reply: "support"

Please respond in one word based on the rules provided above. Do not provide an explanation or additional context.'''

In [9]:
# Create a new batch processing file on the error set
gpt_4o_file_name = 'gpt_4o_test_validation_set'

gpt_4o_batch_data = get_batch_completion_tasks(
    df=df,
    user_prompt_col='corrected_input',
    model='gpt-4o-2024-08-06',
    temperature=0,
    system_prompt=system_prompt,
    max_tokens=3
)

generate_jsonl_file(gpt_4o_file_name, gpt_4o_batch_data)

# Upload batch set to OpenAI
gpt_4o_batch_file_id = upload_file_to_openai(f'{gpt_4o_file_name}.jsonl', 'batch')
print(gpt_4o_batch_file_id)

# Create Batch Job for Error File
gpt_4o_batch_job = create_batch_completion_job(
    gpt_4o_batch_file_id,
    "/v1/chat/completions",
    {"description": f"Get GPT-4o model results on test set batch file {gpt_4o_batch_file_id}"}
)
gpt_4o_batch_job_id = gpt_4o_batch_job.id
print(gpt_4o_batch_job_id)

# Monitor Batch job Status
batch_job_output_file_id = None
batch_error_file_id      = None
job_status = None

while job_status not in ['completed', 'failed', 'cancelled']:
    # Check status every 5 minutes
    time.sleep(60*5)
    
    batch_job_status         = get_batch_job_status(gpt_4o_batch_job_id)
    job_status               = batch_job_status.status
    batch_job_output_file_id = batch_job_status.output_file_id
    batch_error_file_id      = batch_job_status.error_file_id

    print(batch_job_status, '\n')

# Save and map results back to original DF. 
gpt_4o_results_file_name = 'gpt_4o_validation_model_results_records'
output_file = get_batch_job_results(batch_job_output_file_id)
save_batch_results_to_jsonl(output_file, gpt_4o_results_file_name)

file-ijucwcZ6XYiIYeVrjLO0ZIqa
batch_YgDOHO5QFrnKICgrR0pOkrSU
Batch(id='batch_YgDOHO5QFrnKICgrR0pOkrSU', completion_window='24h', created_at=1726994163, endpoint='/v1/chat/completions', input_file_id='file-ijucwcZ6XYiIYeVrjLO0ZIqa', object='batch', status='in_progress', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1727080563, failed_at=None, finalizing_at=None, in_progress_at=1726994167, metadata={'description': 'Get GPT-4o model results on test set batch file file-ijucwcZ6XYiIYeVrjLO0ZIqa'}, output_file_id=None, request_counts=BatchRequestCounts(completed=2720, failed=0, total=8068), _request_id='req_74ab6dc7a32a1b41e4e92166791bafe7', __exclude_fields__={'_request_id', '__exclude_fields__'}) 

Batch(id='batch_YgDOHO5QFrnKICgrR0pOkrSU', completion_window='24h', created_at=1726994163, endpoint='/v1/chat/completions', input_file_id='file-ijucwcZ6XYiIYeVrjLO0ZIqa', object='batch', status='in_progress', cancelled_at=N

In [10]:
df['gpt4o_validation_results'] = None
map_jsonl_batch_completion_results_to_df(df, gpt_4o_results_file_name, 'gpt4o_validation_results')

get_model_cross_validation(df, 'target', 'gpt4o_validation_results')

{'accuracy': 0.6197322756569162,
 'f1_macro': 0.5022988269823848,
 'f1_micro': 0.6197322756569162,
 'f1_weighted': 0.6931657758777192,
 'precision_macro': 0.4907279476292877,
 'precision_micro': 0.6197322756569162,
 'precision_weighted': 0.8882163414951358,
 'recall_macro': 0.7739748795030444,
 'recall_micro': 0.6197322756569162,
 'recall_weighted': 0.6197322756569162}

In [11]:
# Create a new batch processing file on the error set
gpt_4o_file_name = 'gpt_4_turbo_test_validation_set'

gpt_4o_batch_data = get_batch_completion_tasks(
    df=df,
    user_prompt_col='corrected_input',
    model='gpt-4-turbo',
    temperature=0,
    system_prompt=system_prompt,
    max_tokens=3
)

generate_jsonl_file(gpt_4o_file_name, gpt_4o_batch_data)

# Upload batch set to OpenAI
gpt_4o_batch_file_id = upload_file_to_openai(f'{gpt_4o_file_name}.jsonl', 'batch')
print(gpt_4o_batch_file_id)

# Create Batch Job for Error File
gpt_4o_batch_job = create_batch_completion_job(
    gpt_4o_batch_file_id,
    "/v1/chat/completions",
    {"description": f"Get GPT-4o model results on test set batch file {gpt_4o_batch_file_id}"}
)
gpt_4o_batch_job_id = gpt_4o_batch_job.id
print(gpt_4o_batch_job_id)

# Monitor Batch job Status
batch_job_output_file_id = None
batch_error_file_id      = None
job_status = None

while job_status not in ['completed', 'failed', 'cancelled']:
    # Check status every 5 minutes
    time.sleep(60*5)
    
    batch_job_status         = get_batch_job_status(gpt_4o_batch_job_id)
    job_status               = batch_job_status.status
    batch_job_output_file_id = batch_job_status.output_file_id
    batch_error_file_id      = batch_job_status.error_file_id

    print(batch_job_status, '\n')

# Save and map results back to original DF. 
gpt_4o_results_file_name = 'gpt_4_turbo_validation_model_results_records'
output_file = get_batch_job_results(batch_job_output_file_id)
save_batch_results_to_jsonl(output_file, gpt_4o_results_file_name)

file-HcfUif77AT9YidUqZDmBnB15
batch_yA3D47eIaeBR55J6sUawL1Bf
Batch(id='batch_yA3D47eIaeBR55J6sUawL1Bf', completion_window='24h', created_at=1726996045, endpoint='/v1/chat/completions', input_file_id='file-HcfUif77AT9YidUqZDmBnB15', object='batch', status='in_progress', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1727082445, failed_at=None, finalizing_at=None, in_progress_at=1726996051, metadata={'description': 'Get GPT-4o model results on test set batch file file-HcfUif77AT9YidUqZDmBnB15'}, output_file_id=None, request_counts=BatchRequestCounts(completed=2719, failed=0, total=8068), _request_id='req_d711d0932f89c7d68de291346232ddfe', __exclude_fields__={'_request_id', '__exclude_fields__'}) 

Batch(id='batch_yA3D47eIaeBR55J6sUawL1Bf', completion_window='24h', created_at=1726996045, endpoint='/v1/chat/completions', input_file_id='file-HcfUif77AT9YidUqZDmBnB15', object='batch', status='in_progress', cancelled_at=N

In [12]:
df['gpt_4_turbo_validation_results'] = None
map_jsonl_batch_completion_results_to_df(df, gpt_4o_results_file_name, 'gpt_4_turbo_validation_results')

get_model_cross_validation(df, 'target', 'gpt_4_turbo_validation_results')

{'accuracy': 0.5333415964303421,
 'f1_macro': 0.4960153803168628,
 'f1_micro': 0.5333415964303421,
 'f1_weighted': 0.6183040880225604,
 'precision_macro': 0.5233416423361245,
 'precision_micro': 0.5333415964303421,
 'precision_weighted': 0.9017689567065,
 'recall_macro': 0.7534930818134044,
 'recall_micro': 0.5333415964303421,
 'recall_weighted': 0.5333415964303421}