In [1]:
from transformers import pipeline
import pandas as pd
import os
import matplotlib.pyplot as plt
import torch
from sklearn.metrics import f1_score, roc_auc_score, accuracy_score
device = 0 if torch.cuda.is_available() else -1

In [2]:
model_checkpoint = '../../GoEmotions/DiracGiraf/roberta-base-goemotions'
classifier = pipeline("sentiment-analysis", model=model_checkpoint, device=device, return_all_scores=True)

Xformers is not installed correctly. If you want to use memory_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.


In [3]:
classifier("A little fall of rain can hardly hurt me now.")

[[{'label': 'admiration', 'score': 0.0018051706720143557},
  {'label': 'amusement', 'score': 0.0029350074473768473},
  {'label': 'anger', 'score': 0.007007139269262552},
  {'label': 'annoyance', 'score': 0.03936273977160454},
  {'label': 'approval', 'score': 0.08013195544481277},
  {'label': 'caring', 'score': 0.052060361951589584},
  {'label': 'confusion', 'score': 0.0024660772178322077},
  {'label': 'curiosity', 'score': 0.002276606857776642},
  {'label': 'desire', 'score': 0.0024517029523849487},
  {'label': 'disappointment', 'score': 0.0689345970749855},
  {'label': 'disapproval', 'score': 0.049655284732580185},
  {'label': 'disgust', 'score': 0.0033764310646802187},
  {'label': 'embarrassment', 'score': 0.0032214014790952206},
  {'label': 'excitement', 'score': 0.0012938411673530936},
  {'label': 'fear', 'score': 0.00445200689136982},
  {'label': 'gratitude', 'score': 0.0016569423023611307},
  {'label': 'grief', 'score': 0.0034752271603792906},
  {'label': 'joy', 'score': 0.004112

In [4]:
df_test = pd.read_csv('../reformat_data/test_wide.csv')
pred = classifier(df_test['Text'].tolist())

In [5]:
scores = [[pred[i][j]['score'] for j in range(28)] for i in range(len(pred))]

In [6]:
emos = [pred[0][j]['label'] for j in range(28)]
pred_df = pd.DataFrame(scores, columns = emos)
pred_df.head()

Unnamed: 0,admiration,amusement,anger,annoyance,approval,caring,confusion,curiosity,desire,disappointment,...,love,nervousness,optimism,pride,realization,relief,remorse,sadness,surprise,neutral
0,0.013935,0.006744,0.007678,0.008139,0.028923,0.036836,0.006633,0.015746,0.008729,0.033192,...,0.392426,0.005554,0.017093,0.00198,0.011743,0.004885,0.531624,0.331093,0.006865,0.030206
1,0.576938,0.005429,0.014492,0.037871,0.025921,0.002733,0.006187,0.006927,0.00606,0.034375,...,0.016771,0.004805,0.010493,0.006595,0.010148,0.004013,0.00286,0.00759,0.044751,0.029417
2,0.13392,0.006544,0.001953,0.004801,0.178036,0.063661,0.005623,0.022432,0.010005,0.00128,...,0.003557,0.001182,0.252231,0.004944,0.008963,0.008246,0.000626,0.000777,0.00607,0.091112
3,0.01811,0.013819,0.002696,0.008705,0.014585,0.003039,0.010673,0.004767,0.001613,0.00434,...,0.004364,0.000577,0.003663,0.001313,0.035554,0.003385,0.002351,0.002924,0.011143,0.017955
4,0.001799,0.00307,0.003995,0.022705,0.033099,0.003817,0.003728,0.002551,0.001496,0.007557,...,0.000685,0.000649,0.009692,0.00053,0.0268,0.001224,0.000623,0.002557,0.001933,0.924905


In [7]:
def multi_label_metrics(predictions, y_true, threshold=0.5):
    # first, apply sigmoid on predictions which are of shape (batch_size, num_labels)
    binary_pred = (predictions >= threshold).astype(int)
    f1_micro_average = f1_score(y_true=y_true, y_pred=binary_pred, average='micro')
    roc_auc = roc_auc_score(y_true, binary_pred, average = 'micro')
    accuracy = accuracy_score(y_true, binary_pred)
    # return as dictionary
    metrics = {'f1': f1_micro_average,
               'roc_auc': roc_auc,
               'accuracy': accuracy}
    return metrics
print('The micro average scores are')
print(multi_label_metrics(pred_df, df_test[emos]))

The micro average scores are
{'f1': 0.5987443946188341, 'roc_auc': 0.7586149711578436, 'accuracy': 0.49659111848166576}


In [8]:
def individual_label_metrics(predictions, y_true, label, threshold=0.5):
    binary_pred = (predictions[label] >= threshold).astype(int)
    y_true_one_label = y_true[label]
    f1 = f1_score(y_true=y_true_one_label, y_pred=binary_pred)
    roc_auc = roc_auc_score(y_true_one_label, binary_pred)
    accuracy = accuracy_score(y_true_one_label, binary_pred)
    # return as dictionary
    metrics = {'f1': f1,
               'roc_auc': roc_auc,
               'accuracy': accuracy}
    return metrics


individual_metrics = [individual_label_metrics(pred_df, df_test, emo, threshold=0.5) for emo in emos]
print('The metrics for individual emotion categories:')
pd.DataFrame(individual_metrics, index = emos)

The metrics for individual emotion categories:


Unnamed: 0,f1,roc_auc,accuracy
admiration,0.687842,0.801859,0.947485
amusement,0.839041,0.956752,0.982679
anger,0.509202,0.705293,0.970518
annoyance,0.138889,0.537594,0.942878
approval,0.331839,0.603345,0.945089
caring,0.222222,0.562868,0.978073
confusion,0.462151,0.68575,0.975124
curiosity,0.494118,0.712109,0.95246
desire,0.456693,0.673295,0.987286
disappointment,0.142012,0.539166,0.973282
