In [59]:
from transformers import pipeline

In [56]:
pip install accelerate

Collecting accelerateNote: you may need to restart the kernel to use updated packages.

  Downloading accelerate-0.19.0-py3-none-any.whl (219 kB)
     ---------------------------------------- 0.0/219.1 kB ? eta -:--:--
     -------------------------------------  215.0/219.1 kB 6.6 MB/s eta 0:00:01
     -------------------------------------- 219.1/219.1 kB 4.4 MB/s eta 0:00:00
Installing collected packages: accelerate
Successfully installed accelerate-0.19.0


In [98]:
import pandas as pd
import os
import numpy as np
import matplotlib.pyplot as plt

ModuleNotFoundError: No module named 'matplotlib'

In [61]:
classifier = pipeline("sentiment-analysis", model='monologg/bert-base-cased-goemotions-original')

In [62]:
classifier("Looks like you can't even identify sarcasm.")

[{'label': 'annoyance', 'score': 0.9783640503883362}]

In [41]:
df_test = pd.read_csv('./data/test.tsv', sep='\t', header = None, names=['Text','Class','ID'])
df_test['List of classes'] = df_test['Class'].apply(lambda x: x.split(','))
df_test['Len of classes'] = df_test['List of classes'].apply(lambda x: len(x))

In [42]:
df_test.head()

Unnamed: 0,Text,Class,ID,List of classes,Len of classes
0,I’m really sorry about your situation :( Altho...,25,eecwqtt,[25],1
1,It's wonderful because it's awful. At not with.,0,ed5f85d,[0],1
2,"Kings fan here, good luck to you guys! Will be...",13,een27c3,[13],1
3,"I didn't know that, thank you for teaching me ...",15,eelgwd1,[15],1
4,They got bored from haunting earth for thousan...,27,eem5uti,[27],1


In [93]:
emo2idx = {}
i = 0
with open('./data/emotions.txt','r') as f:
    for line in f:
        emo2idx[line.strip()]=str(i)
        i+=1
emo2idx

{'admiration': '0',
 'amusement': '1',
 'anger': '2',
 'annoyance': '3',
 'approval': '4',
 'caring': '5',
 'confusion': '6',
 'curiosity': '7',
 'desire': '8',
 'disappointment': '9',
 'disapproval': '10',
 'disgust': '11',
 'embarrassment': '12',
 'excitement': '13',
 'fear': '14',
 'gratitude': '15',
 'grief': '16',
 'joy': '17',
 'love': '18',
 'nervousness': '19',
 'optimism': '20',
 'pride': '21',
 'realization': '22',
 'relief': '23',
 'remorse': '24',
 'sadness': '25',
 'surprise': '26',
 'neutral': '27'}

In [83]:
%%time
pd.DataFrame(classifier(df_test['Text'].head().tolist()))

CPU times: total: 25.1 s
Wall time: 10.5 s


Unnamed: 0,label,score
0,sadness,0.756309
1,disgust,0.697608
2,optimism,0.64005
3,gratitude,0.999892
4,neutral,0.999953


In [75]:
num_sample = df_test.shape[0]

In [82]:
file_name = 'monologg_predictions'
batch_size = 100
num_batches = int(np.ceil(num_sample/batch_size))
print('Making predictions for test set.')
print(num_batches,'batches of size',batch_size)

preds = classifier(df_test['Text'].head(batch_size).tolist())
pd.DataFrame(preds).to_csv(file_name,index=False)

for i in range(1,num_batches):
    print('Processing batch',i+1,'of',num_batches,end='\r')
    preds = classifier(df_test.loc[i*batch_size:(i+1)*batch_size-1,'Text'].tolist())
    pd.DataFrame(preds).to_csv(file_name,mode='a',header=False,index=False)
print('Complete!')

Making predictions for test set.
55 batches of size 100
Complete!g batch 55 of 55


In [85]:
pred_df = pd.read_csv(file_name)
print(pred_df.shape)
pred_df.head()

(5427, 2)


Unnamed: 0,label,score
0,sadness,0.756309
1,disgust,0.697608
2,optimism,0.64005
3,gratitude,0.999892
4,neutral,0.999953


In [94]:
pred_df['Class'] = pred_df['label'].replace(emo2idx) 

In [95]:
pred_df.head()

Unnamed: 0,label,score,Class
0,sadness,0.756309,25
1,disgust,0.697608,11
2,optimism,0.64005,20
3,gratitude,0.999892,15
4,neutral,0.999953,27


In [103]:
pred_df['Correct'] = [pred_df.loc[i,'Class'] in df_test.loc[i,'List of classes'] for i in range(num_sample)]

In [112]:
df_test["Predicted class"] = pred_df['Class']
df_test["Predicted label"] = pred_df['label']
df_test["Prediction probability"] = pred_df['score']
df_test["Prediction correct"] = pred_df['Correct']
df_test.head(10)

Unnamed: 0,Text,Class,ID,List of classes,Len of classes,Predicted class,Predicted label,Prediction probability,Prediction correct
0,I’m really sorry about your situation :( Altho...,25,eecwqtt,[25],1,25,sadness,0.756309,True
1,It's wonderful because it's awful. At not with.,0,ed5f85d,[0],1,11,disgust,0.697608,False
2,"Kings fan here, good luck to you guys! Will be...",13,een27c3,[13],1,20,optimism,0.64005,False
3,"I didn't know that, thank you for teaching me ...",15,eelgwd1,[15],1,15,gratitude,0.999892,True
4,They got bored from haunting earth for thousan...,27,eem5uti,[27],1,27,neutral,0.999953,True
5,Thank you for asking questions and recognizing...,15,ef2nq7i,[15],1,15,gratitude,0.999883,True
6,You’re welcome,15,efdbh17,[15],1,15,gratitude,0.999563,True
7,100%! Congrats on your job too!,15,ef0ec3b,[15],1,13,excitement,0.467985,False
8,I’m sorry to hear that friend :(. It’s for the...,24,ee8utmi,[24],1,24,remorse,0.82645,True
9,"Girlfriend weak as well, that jump was pathetic.",25,eeni74k,[25],1,10,disapproval,0.493144,False


In [111]:
print('The accuracy score of the monologg model is', pred_df['Correct'].mean())

The accuracy score of the monologg model is 0.603464160678091
