In [1]:
import pandas as pd
import numpy as np
import os
import json
import torch
from transformers import BertTokenizer
from model import BertForMultiLabelClassification
from multilabel_pipeline import MultiLabelPipeline

In [2]:
# original, group, ekman
taxonomy = 'original'

In [3]:
df_train = pd.read_csv('./data/' + taxonomy + '/' + 'train.tsv', sep= "\t", header=None, converters={1: str})
df_test = pd.read_csv('./data/' + taxonomy + '/' + 'test.tsv', sep= "\t", header=None, converters={1: str})
df_dev = pd.read_csv('./data/' + taxonomy + '/' + 'dev.tsv', sep= "\t", header=None, converters={1: str})

In [4]:
pd.set_option('display.max_colwidth', None)
df_dev

Unnamed: 0,0,1,2
0,Is this in New Orleans?? I really feel like this is New Orleans.,27,edgurhb
1,"You know the answer man, you are programmed to capture those codes they send you, don’t avoid them!",427,ee84bjg
2,I've never been this sad in my life!,25,edcu99z
3,"The economy is heavily controlled and subsidized by the government. In any case, I was poking at the lack of nuance in US politics today",427,edc32e2
4,He could have easily taken a real camera from a legitimate source and change the price in Word/Photoshop and then print it out.,20,eepig6r
...,...,...,...
5421,It's pretty dangerous when the state decides which fictional deity is legitimate. For example the [NAME].....,14,edyrazk
5422,I filed for divorce this morning. Hoping he moves out in the next day or so.,20,edi2z3y
5423,"The last time it happened I just said, ""No"" and closed the door",10,eewbqtx
5424,I can’t stand this arrogant prick he’s no better thenFord in Ontario and that guy is a buffoon,3,eefx57m


In [5]:
train_x = df_train[0].to_list()
test_x = df_test[0].to_list()
dev_x = df_dev[0].to_list()

train_y = df_train[1].to_list()
test_y = df_test[1].to_list()
dev_y = df_dev[1].to_list()

In [6]:
model_dir_dict = {'original':'model/bert-base-uncased-goemotions-original-finetuned', 'ekman':'model/bert-base-uncased-goemotions-ekman-finetuned', 'group':'model/bert-base-uncased-goemotions-group-finetuned'}
model_dir = model_dir_dict[taxonomy]

In [7]:
model_dir

'model/bert-base-uncased-goemotions-original-finetuned'

In [8]:
tokenizer = BertTokenizer.from_pretrained(model_dir)
model = BertForMultiLabelClassification.from_pretrained(model_dir)

goemotions = MultiLabelPipeline(
        model=model,
        tokenizer=tokenizer,
        threshold=0.,
        device = 0 if torch.cuda.is_available() else -1
    )

label_list = []
with open(os.path.join("data", taxonomy, "labels.txt"), "r", encoding="utf-8") as f:
    for line in f:
        label_list.append(line.rstrip())

In [9]:
label_list

['admiration',
 'amusement',
 'anger',
 'annoyance',
 'approval',
 'caring',
 'confusion',
 'curiosity',
 'desire',
 'disappointment',
 'disapproval',
 'disgust',
 'embarrassment',
 'excitement',
 'fear',
 'gratitude',
 'grief',
 'joy',
 'love',
 'nervousness',
 'optimism',
 'pride',
 'realization',
 'relief',
 'remorse',
 'sadness',
 'surprise',
 'neutral']

In [10]:
data_x_dict = {'train': train_x, 'test': test_x, 'dev': dev_x}
data_y_dict = {'train': train_y, 'test': test_y, 'dev': dev_y}

data_type = 'test'

data_x = data_x_dict[data_type]
data_y = data_y_dict[data_type]

In [11]:
from sklearn.preprocessing import MultiLabelBinarizer
testing_data_y = []
testing_data_x = []
for idx, i in enumerate(data_y):
    testing_data_x.append(data_x[idx])
    testing_data_y.append(tuple([int(x) for x in i.split(',')]))
multilabelencoder = MultiLabelBinarizer()
testing_data_y = multilabelencoder.fit_transform(testing_data_y)
testing_data_y

array([[0, 0, 0, ..., 1, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 1],
       [1, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 1]])

In [12]:
len(testing_data_y)

5427

In [13]:
predicts = []
batch_size = 1000
count_start = 0
count_end = batch_size
data_size = len(testing_data_x)
check = True
while(check):
    if(count_end >= data_size):
        count_end = data_size
        check = False
    print(count_start,count_end)
    testing_data_x_sample = testing_data_x[count_start:count_end]
    testing_data_y_sample = testing_data_y[count_start:count_end]
    predicts += goemotions(testing_data_x_sample)
    count_start += batch_size
    count_end += batch_size

0 1000
1000 2000
2000 3000
3000 4000
4000 5000
5000 5427


In [14]:
len(predicts)

5427

In [15]:
predicts[0]

{'labels': ['admiration',
  'amusement',
  'anger',
  'annoyance',
  'approval',
  'caring',
  'confusion',
  'curiosity',
  'desire',
  'disappointment',
  'disapproval',
  'disgust',
  'embarrassment',
  'excitement',
  'fear',
  'gratitude',
  'grief',
  'joy',
  'love',
  'nervousness',
  'optimism',
  'pride',
  'realization',
  'relief',
  'remorse',
  'sadness',
  'surprise',
  'neutral'],
 'scores': [0.3023537,
  0.32703906,
  0.31518978,
  0.3240127,
  0.31887832,
  0.39896464,
  0.317414,
  0.30912572,
  0.27703688,
  0.31688893,
  0.32620695,
  0.29099214,
  0.28276277,
  0.30462122,
  0.31252834,
  0.30991644,
  0.3929609,
  0.3185283,
  0.5760638,
  0.2890546,
  0.28986543,
  0.2929844,
  0.2972568,
  0.29906216,
  0.5854968,
  0.6270702,
  0.321214,
  0.40425304]}

In [16]:
th = 0.5
predict_y = []
for predict in predicts:
    ans = np.array(predict['scores'])
    ans[ans > th] = 1
    ans[ans != 1] = 0
    predict_y.append(ans)
predict_y = np.array(predict_y)

In [17]:
pd.DataFrame(goemotions(["Thanks for giving advice to the people who need it! 👌🙏"])[0])

Unnamed: 0,labels,scores
0,admiration,0.344866
1,amusement,0.303256
2,anger,0.314408
3,annoyance,0.33342
4,approval,0.363553
5,caring,0.334161
6,confusion,0.316989
7,curiosity,0.313076
8,desire,0.307223
9,disappointment,0.319463


In [18]:
testing_data_y[0]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0])

In [19]:
predict_y[0]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 1., 0., 0., 0., 0., 0., 1., 1., 0., 0.], dtype=float32)

In [20]:
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
results = dict()

results["accuracy"] = accuracy_score(testing_data_y, predict_y)
results["macro_precision"], results["macro_recall"], results["macro_f1"], _ = precision_recall_fscore_support(testing_data_y, predict_y, average="macro")
results["micro_precision"], results["micro_recall"], results["micro_f1"], _ = precision_recall_fscore_support(testing_data_y, predict_y, average="micro")
results["weighted_precision"], results["weighted_recall"], results["weighted_f1"], _ = precision_recall_fscore_support(testing_data_y, predict_y, average="weighted")
results["precision"], results["recall"], results["f1"], _ = precision_recall_fscore_support(testing_data_y, predict_y, average=None)

In [21]:
result_list = []
for idx, label in enumerate(label_list):
    result_list.append([label, results['precision'][idx], results['recall'][idx], results['f1'][idx]])
df_result = pd.DataFrame(result_list)
df_result = df_result.sort_values(by=[0])
df_result = df_result.append({0:'macro-average', 1:results['macro_precision'], 2:results['macro_recall'], 3:results['macro_f1']}, ignore_index=True)
df_result.columns = ['Emotion', 'Precision', 'Recall', 'F1']
df_result

Unnamed: 0,Emotion,Precision,Recall,F1
0,admiration,0.609467,0.81746,0.698305
1,amusement,0.734139,0.920455,0.816807
2,anger,0.434164,0.616162,0.509395
3,annoyance,0.295374,0.51875,0.376417
4,approval,0.333981,0.490028,0.397229
5,caring,0.34359,0.496296,0.406061
6,confusion,0.293413,0.640523,0.402464
7,curiosity,0.434783,0.774648,0.556962
8,desire,0.583333,0.506024,0.541935
9,disappointment,0.262911,0.370861,0.307692


In [22]:
for idx, predict in enumerate(predicts):
    if(len(predict['labels']) >= 1):
        print("=========================Text=========================")
        print(testing_data_x[idx])
        
        print("========================Predice=======================")
        for predict_idx, predict_score in enumerate(predict['scores']):
            if(predict_score > th):  
                print(label_list[int(predict_idx)], predict_score)
                
        print("======================Ground Truth====================")
        for label_idx, label in enumerate(testing_data_y[idx]):
            if(int(label) == 1):
                print(label_list[label_idx])
        print()

I’m really sorry about your situation :( Although I love the names Sapphira, Cirilla, and Scarlett!
love 0.5760638
remorse 0.5854968
sadness 0.6270702
sadness

It's wonderful because it's awful. At not with.
admiration 0.7306483
disgust 0.5294815
admiration

Kings fan here, good luck to you guys! Will be an interesting game to watch! 
excitement 0.6086292
optimism 0.70325357
excitement

I didn't know that, thank you for teaching me something today!
gratitude 0.82902443
gratitude

They got bored from haunting earth for thousands of years and ultimately moved on to the afterlife.
neutral 0.7826765
neutral

Thank you for asking questions and recognizing that there may be things that you don’t know or understand about police tactics. Seriously. Thank you.
gratitude 0.8341989
gratitude

You’re welcome
gratitude 0.80993676
gratitude

100%! Congrats on your job too!
admiration 0.5741343
gratitude 0.7465843
gratitude

I’m sorry to hear that friend :(. It’s for the best most likely if she didn’

Thanks a bunch <3
gratitude 0.8250928
gratitude

I just want tk be a random scumbag
desire 0.68322873
neutral 0.6270944
desire

Very possible but there's absolutely zero indication were working on a deal to trade [NAME] currently. We're not just going to trade him away for peanuts.
disapproval 0.55487657
neutral 0.70857686
neutral

I agree that you need more time in the later turns. But most games it feels like people wait until it is their turn to start thinking.
approval 0.77086973
approval

Hopefully you saved that letter!
optimism 0.79165244
optimism

LIGHT THE BEACONS!
neutral 0.7777298
neutral

Would I love to be a fly on your wall right now and watch this whole debacle.
desire 0.53945374
love 0.7199314
love

How do you know this?
curiosity 0.5786036
neutral 0.69173384
neutral

What do you think women do? I’m quiet as a churchmouse when I mastrubate, too
curiosity 0.76580685
neutral 0.5351791
approval

That's what they want though. So if you really want to 'win' their battles, si

Softest fans in sports
neutral 0.71761465
neutral

Before the camera was on him, he noticed the atrocious fake jersey on the nimrod beside him. 
neutral 0.79694635
neutral

at least it wasn’t the evil [NAME].
relief 0.756377
relief

Thanks for having that self awareness, as a physician in training I feel stupid daily. It's a very humbling job
gratitude 0.8240724
gratitude

Yes, your trolling is very good. You’re so brave for giving us the opportunity to downvote your sock account to hell.
admiration 0.8249241
neutral

me too, always smoke a cig with the joint, they are best friends
approval 0.5921205
love 0.59301066
neutral 0.57673013
neutral

It's real enough sadly. Kin have been around for awhile in their own corners of the net.
sadness 0.7799079
neutral

Yeah I wish they could just finish 9th every year in the conference
desire 0.74689037
optimism 0.5147523
optimism

Thanks, saved. Will watch it tomorrow morning :) 
gratitude 0.81256866
gratitude

oh cool! Must be tiring for [NAME] 

curiosity 0.6468521
neutral 0.59300905
curiosity

Thanks Harvard!
gratitude 0.8235481
gratitude

What makes it ok as it is?
curiosity 0.6130897
neutral 0.63237685
confusion

i am a man and i disagree. i mean you logic is worse if you believe it.
disapproval 0.7789206
disapproval

Thanks. And wow! Nice burn.
admiration 0.7264485
gratitude 0.7380131
admiration
excitement
gratitude

Wow those girls in the corner "twerking" were the best (worst?) part. 
admiration 0.6018531
disgust 0.5102621
admiration

I love this contract structure especially since [NAME] is only 31.
love 0.8260934
admiration
love

Who wrote the IQ tests?
curiosity 0.6493737
neutral 0.60172236
curiosity

I remember running into teachers at the grocery store or whatever when I was a little kid and having that same feeling!
realization 0.76852274
joy

I'm impressed you found a non troll post of theirs.
admiration 0.77305466
admiration

You can always tell from the vote counts lol. They're way too blatant
amusement 0.799092

Wow. Did u say yes?
curiosity 0.6684508
surprise 0.6673091
curiosity
surprise

His returns or his W2?
confusion 0.53291154
curiosity 0.51768583
neutral 0.6941298
confusion
neutral

Wouldn't the powder be derived from the crystalline MDMA?
confusion 0.59416586
curiosity 0.65748006
neutral 0.560953
neutral

Omg two little blankets for your penis. LOVE THAT.
love 0.8364048
love

I like how people are downvoting when this is how so many military coups have happened. I challenge those who downvote to prove me wrong
approval 0.51170516
neutral 0.58580023
love

Yep. In Sixth grade a boy named [NAME] just stabbed me in the stomach with a pencil.
approval 0.5057155
neutral

Wow, I remember having this kind of meal a lot as a kid. 
surprise 0.7278942
realization

Wow that’s like - sound porn.
excitement 0.5750926
surprise 0.69053614
admiration

if only we could blast this type of stuff out there for everyone to hear, then we might just get get those neo libs to the realization.
realization 0.774

neutral 0.777304
neutral

I said to myself "It's going to be reefer madness" Clicked the link and it was Reefer Madness.
neutral 0.64572585
embarrassment

shaved definitely makes things more swampy for me. I need the hair to soak up the swamp
desire 0.5724858
neutral 0.6836597
neutral

I get hella pissed when people who are shorter than me complain about being too tall.
anger 0.72971994
annoyance 0.64274126
annoyance

"This droid has a bad motivator."
annoyance 0.57056904
disapproval 0.611523
disgust 0.55279
disappointment
disgust

I would play the fuck out of this game
anger 0.7697652
anger

It's fairly modern.. It opened in 2007.
approval 0.5617805
neutral 0.6861768
neutral

Yeah after I showed it to my friend a few minutes later I realized what he meant but I forgot to remove my post.
realization 0.83774143
embarrassment
realization

Those are different, you don't tie that shit to the weakest part, they just stay on there by magic
approval 0.52476716
neutral 0.7247432
anger

I never

Some of them are delusional and believe their victims want it
neutral 0.76084274
annoyance
neutral

Thanks I hate it.
anger 0.54916114
annoyance 0.5766154
gratitude 0.7658596
gratitude

Title made me think I was on r/boottoobig for a second
neutral 0.8133154
neutral

what the fuck do you learn in first grade that’s so important anyway?
anger 0.73723495
annoyance 0.5334933
anger

>Have a good time debating the people in your head, looks like you're having fun.
amusement 0.6025247
joy 0.62622666
joy

You're telling me you dont get your meth from there?
curiosity 0.71832776
neutral 0.5612186
neutral

[NAME] revealed that's what [NAME] called it at a reunion. [NAME] was backstage with steam coming out his ears 😂
neutral 0.8044342
neutral

Big yikes. Biiiig yikes. I do feel your pain. I wish I could upvote you more than once.
desire 0.8079037
desire

Srs that shade range is abysmal smh
neutral 0.752961
neutral

I am [NAME] total lack of suprise
surprise 0.64385146
surprise

It kind of repla

approval 0.5680003
neutral 0.6235406
love
neutral

They are the modern day Nickelback.
neutral 0.80074126
neutral

America is not a country, it's a fucking business. if you have zero money, no one cares about you.
anger 0.7369157
annoyance 0.5618831
anger

Eat the apple butthole first. It's the best way to eat the apple.
approval 0.80619323
admiration

Its onky been 6 years i think you read the second line
neutral 0.7687441
neutral

I am. I do! You're welcome.
approval 0.5222609
gratitude 0.78612846
gratitude

Thank you! I’ll contact you tomorrow! :)
gratitude 0.83675313
gratitude

Wow damn this some good words
admiration 0.81898814
admiration

Do y’all consider Cadillacs as “luxury” cars similar to Audis, BMW, Lexus ?
confusion 0.55136293
curiosity 0.7195318
neutral 0.5186792
curiosity

WHAT THE HELL!
anger 0.7270099
anger

Hey, thank you. :) I appreciate it.
admiration 0.6940894
gratitude 0.809076
admiration
gratitude

I'm more saying that I think the south is a joke.
amusement 0.661

fear

Bzzzzt, try again
neutral 0.79060316
neutral

I own literally no red shirts, would grey, tan, or purplish be okay?
confusion 0.6729262
curiosity 0.6114809
neutral

That's a really cheap cop-out for rather common criticism of those games.
neutral 0.76281124
annoyance

So can we all collectively accept that [NAME] has a high placement for this week? Okay thank you
gratitude 0.7844048
gratitude

So America and the USSR are the exact same thing to you? Sorry you're so small minded, hope you learn to grow in the future.
optimism 0.66430956
remorse 0.7608657
optimism
remorse

It’s not even news.
neutral 0.7997351
disapproval

Refusing to pay someone for work that they've done is theft. You can try to justify it any way you want but that's what it is.
neutral 0.73733306
disapproval
neutral

I've just listened to it on my way from school today! [NAME] anatomy introduced some beautiful music.
admiration 0.8037333
admiration

This is my ex and I rib about this all the time
neutral 0.789125

approval

You can't post a Sons pic and then say "I got this", that just hurts.
disappointment 0.5648685
sadness 0.68333375
disapproval
sadness

Yes - thank you for stating it this way - perfectly said.
gratitude 0.8297918
gratitude

Congrats!!! Happy for you.
admiration 0.587547
gratitude 0.71143484
joy 0.64683723
joy

Pretty sure the legislature needs to change laws
approval 0.6435679
neutral 0.6082465
neutral

Oh dear, I'm so sorry you are going through this. I don't have any sage advice but I hope things get better for you.
caring 0.58883774
optimism 0.6452002
remorse 0.5911166
sadness 0.57240254
optimism
remorse

Yeah this is pretty eye opening. I’ve had pretty weird relationships with my reflection in the past but thankfully it’s not always this way.
gratitude 0.7892593
gratitude

Nope, from Nebraska why? Lmao
amusement 0.7329098
amusement
curiosity

Terrorism is the systematic use of violence to achieve a political goal. [NAME] has not used violence.
neutral 0.78597814
approval


disapproval 0.53075606
anger

America will lose. You are mentally-deranged and the enemy.
anger 0.5822739
annoyance 0.51643974
neutral

As sure as gods got sandals, it sure beats fighting dudes with treasure trails
approval 0.6303042
neutral 0.5641538
neutral

Thanks you!
gratitude 0.8390448
gratitude

What? Even his follow up comment was related to dog hair.
curiosity 0.658735
neutral 0.6349452
curiosity
realization

Man, that sounds like a big stuff around. Good luck getting it sorted
caring 0.5001407
optimism 0.75697076
optimism

Lindt is premium brand, im not surprised by this.
admiration 0.52748805
approval 0.5249567
neutral 0.61874574
neutral

Give an example of one with an entirely different meaning please. The context of what you mean by different is lacking.
disapproval 0.66329795
neutral 0.6279209
neutral

It's got what plants crave.
approval 0.5130734
desire 0.63952184
neutral 0.50572574
neutral

So you're saying [NAME] doesn't have a personal vendetta against me?
confusion 

sadness

Excess of 300 partners and not using protection is unappealing to me, I dont want catch anything 
disapproval 0.7829025
disapproval

Seen this result coming. Best result for us and rangers unfortunately.
admiration 0.7803944
admiration
approval

just apologize. what other advice could you possibly require? you fucked up, now deal with it.
anger 0.68857425
annoyance 0.506495
remorse 0.51049954
anger
remorse

What do you mean by delicate flowers?
confusion 0.58580023
curiosity 0.6117975
neutral 0.59965736
confusion
curiosity

But we did get an awesome transit line from downtown to the airport, plus most of the Olympic facilities existed before the games.
admiration 0.7496416
admiration

Am I not supposed to have what I want? What I need?!?
confusion 0.50741357
disapproval 0.6316388
neutral 0.5064281
annoyance
confusion

What are the Four Agreements?
curiosity 0.6327873
neutral 0.6381074
neutral

Which one?
curiosity 0.62894905
neutral 0.6440207
neutral

*Not wearing makeup the n

annoyance 0.5740054
disappointment 0.5231979
sadness 0.55483776
annoyance

My kitten just got very happy when Pasta scored. Such loud purrs! I was happy too.
joy 0.8219482
joy

The best LGBTQ+ FILM i have seen in years.
admiration 0.8211409
admiration

I'm glad you have your kids, but have you seen Brooklyn 99 like the op stated? It's worth living for too.
curiosity 0.61048895
joy 0.6284633
admiration
confusion
excitement
joy

Right? It’s not like her family seems like they wouldn’t have helped with her son.
neutral 0.66509515
disappointment

not sure I agree. whispers of madness can set up [NAME] signature and being card draw is always useful in the first lane.
confusion 0.6102024
disapproval 0.5196218
approval

I actually felt my ballsack jump up a bit when he face planted
surprise 0.58819306
neutral

Winter isn't even that bad here.
approval 0.5288747
neutral 0.621897
neutral

I think this is the only place on the internet that is happy with this commercial hahaha
amusement 0.706551

In [23]:
# only output the scores that are higher than the threshold
goemotions.threshold = th

In [24]:
goemotions(["I don't believe it is true"], ["I don't believe it is true. It's really good"])

[{'labels': ['disapproval', 'neutral'], 'scores': [0.6078693, 0.50720966]},
 {'labels': ['admiration', 'confusion', 'disapproval'],
  'scores': [0.61421245, 0.5031275, 0.5371183]}]

In [25]:
goemotions(["fucking love you"], ["fuck you"])

[{'labels': ['love'], 'scores': [0.80132246]},
 {'labels': ['anger'], 'scores': [0.7613473]}]