In [12]:
# NOTE: Based on your requirements, make changes to the variables:  checkpoints_out_dir, dataset_subset, predictions_out_dir

# dataset
dataset_name = 'clinc_oos'
dataset_subset = 'small'

# model 
checkpoints_out_dir = '../checkpoints/clinc_small/checkpoint-15200'

#device
device = 'cuda:0'

# pipeline
pipeline_task = 'text-classification'

# predictions dir
predictions_out_dir = '../predictions/clinc_small.csv'

In [13]:
# load test dataset
from datasets import load_dataset

test_data = load_dataset(dataset_name, dataset_subset, split = 'test')
test_data = test_data.rename_column("intent", "label")

test_data

Found cached dataset clinc_oos (/work/pi_adrozdov_umass_edu/vpamidimukka_umass_edu/hf_cache/datasets/clinc_oos/small/1.0.0/abcc41d382f8137f039adc747af44714941e8196e845dfbdd8ae7a7e020e6ba1)


Dataset({
    features: ['text', 'label'],
    num_rows: 5500
})

In [14]:
# predict test dataset
from transformers import pipeline
from sklearn.metrics import classification_report

classifier = pipeline(pipeline_task, model=checkpoints_out_dir, device=device)

# Make predictions on the testing dataset
predictions = classifier(test_data['text'], batch_size=16)

# Convert the predictions to a list of labels
predicted_labels = [p['label'] for p in predictions]
true_labels = [classifier.model.config.id2label[label] for label in test_data['label']]

In [15]:
# calculate f1 score for each label and accuracy
from sklearn.metrics import classification_report

report = classification_report(true_labels, predicted_labels, output_dict=True)

# report has three root variables 1. accuracy 2. macro avg 3. weighted avg
macro_avg_f1_score = report['macro avg']['f1-score']
weighted_avg_f1_score = report['weighted avg']['f1-score']

accuracy = report['accuracy']

print('Macro Average F1 score: {:.2f}'.format(macro_avg_f1_score))
print('Weighted Average F1 score: {:.2f}'.format(weighted_avg_f1_score))
print('Accuracy: {:.2f}%'.format(accuracy * 100))

Macro Average F1 score: 0.91
Weighted Average F1 score: 0.85
Accuracy: 86.60%


In [16]:
import pandas as pd
df = pd.DataFrame(report)

df = df.transpose()
df = df.reset_index().rename(columns={'index': 'label'})
df = df[:-3] # removing accuracy, macro avg, weighted avg from the report
df.insert(df.columns.get_loc('label') + 1, 'label_index', [classifier.model.config.label2id[l] for l in df['label']])
df_sorted = df.sort_values(by='f1-score')
df_sorted.to_csv(predictions_out_dir, index = False)