In [33]:
from transformers import AutoTokenizer,AutoModelForSeq2SeqLM

class EmotionDetection:
    def __init__(self):
        self.tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-emotion")
        self.model = AutoModelForSeq2SeqLM.from_pretrained("mrm8488/t5-base-finetuned-emotion")

    def detect_emotion(self,text):
        #text = text_dict.get('text')
        input_ids=self.tokenizer.encode(text+'</s>',return_tensors='pt')
        output=self.model.generate(input_ids=input_ids,max_length=2)

        dec = [self.tokenizer.decode(ids) for ids in output]
        label=dec[0]
        label=label.replace("<pad>","")
        #return {'id':text_dict.get('textID'), 'result':{'emotion':label}} 
        return label

In [35]:
model = EmotionDetection()

In [36]:
from sklearn.metrics import confusion_matrix,classification_report
import pandas as pd
import numpy as np
import re

In [37]:
df = pd.read_csv('./tweet_emotions.csv')
df = df[['content','sentiment']]
df.head()

Unnamed: 0,tweet_id,sentiment,content
0,1956967341,empty,@tiffanylue i know i was listenin to bad habi...
1,1956967666,sadness,Layin n bed with a headache ughhhh...waitin o...
2,1956967696,sadness,Funeral ceremony...gloomy friday...
3,1956967789,enthusiasm,wants to hang out with friends SOON!
4,1956968416,neutral,@dannycastillo We want to trade with someone w...


In [48]:
def preprocessing(x):
    x = x.replace("\n"," ")
    return ' '.join(re.sub("(@[A-Za-z0-9]+)|(#[A-Za-z0-9]+)|([^0-9A-Za-z,.'! \t])|(\w+:\/\/\S+)"," ",x).split())

In [50]:
df['text'] = df['content'].map(preprocessing)

In [51]:
X,Y  = df['text'].values,df['sentiment'].values

In [52]:
pred = []
for text in X:
    pred.append(model.detect_emotion(text))

### model testing

In [53]:
final = pd.DataFrame({"actual_labels":Y,"predicted_labels":pred})
final.to_csv('./testing.csv')

In [54]:
final['actual_labels'].unique()

array(['empty', 'sadness', 'enthusiasm', 'neutral', 'worry', 'surprise',
       'love', 'fun', 'hate', 'happiness', 'boredom', 'relief', 'anger'],
      dtype=object)

In [55]:
final['predicted_labels'].unique()

array([' anger', ' fear', ' sadness', ' joy', ' surprise', ' love',
       ' rain', ' patience', ' painting'], dtype=object)

In [7]:
mapdict = {'empty':"sadness" , 'enthusiasm':'joy','worry':'fear',
           'fun':'joy', 'hate':'anger', 'happiness':'joy', 'boredom':'sadness', 'relief':'joy'}

In [1]:
import pandas as pd
import numpy as np

In [4]:
final = pd.read_csv('./testing.csv',index_col='Unnamed: 0')

In [5]:
final.head()

Unnamed: 0,actual_labels,predicted_labels
0,empty,anger
1,sadness,fear
2,sadness,sadness
3,enthusiasm,joy
4,neutral,anger


In [56]:
final['actual_labels'] = final['actual_labels'].map(lambda x : x.strip())
final['predicted_labels'] = final['predicted_labels'].map(lambda x : x.strip())

In [57]:
#' anger', ' fear', ' sadness', ' joy', ' surprise', ' love'
final[(final['actual_labels'] == 'surprise') |
      (final['actual_labels'] == 'anger') |
      (final['actual_labels'] == 'love') | 
     (final['actual_labels'] == 'joy') |
     (final['actual_labels'] == 'sadness') |
     (final['actual_labels'] == 'fear')]

Unnamed: 0,actual_labels,predicted_labels
1,sadness,fear
2,sadness,sadness
6,sadness,anger
8,sadness,sadness
9,sadness,sadness
...,...,...
39984,surprise,sadness
39989,love,anger
39996,love,joy
39997,love,joy


## since the labels are not same , hence model metrics is not a good measure here.

In [58]:
from sklearn.metrics import classification_report,confusion_matrix

In [60]:
print(classification_report(final.actual_labels,final.predicted_labels))

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

       anger       0.01      0.40      0.01       110
     boredom       0.00      0.00      0.00       179
       empty       0.00      0.00      0.00       827
  enthusiasm       0.00      0.00      0.00       759
        fear       0.00      0.00      0.00         0
         fun       0.00      0.00      0.00      1776
   happiness       0.00      0.00      0.00      5209
        hate       0.00      0.00      0.00      1323
         joy       0.00      0.00      0.00         0
        love       0.38      0.09      0.14      3842
     neutral       0.00      0.00      0.00      8638
    painting       0.00      0.00      0.00         0
    patience       0.00      0.00      0.00         0
        rain       0.00      0.00      0.00         0
      relief       0.00      0.00      0.00      1526
     sadness       0.30      0.38      0.34      5165
    surprise       0.15      0.03      0.05      2187
       worry       0.00    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [61]:
confusion_matrix(final.actual_labels,final.predicted_labels)

array([[  44,    0,    0,    0,   18,    0,    0,    0,   27,    0,    0,
           0,    0,    0,    0,   21,    0,    0],
       [  60,    0,    0,    0,   20,    0,    0,    0,   43,    3,    0,
           0,    0,    0,    0,   53,    0,    0],
       [ 276,    0,    0,    0,  111,    0,    0,    0,  314,   12,    0,
           0,    0,    0,    0,  111,    3,    0],
       [ 106,    0,    0,    0,   55,    0,    0,    0,  521,   10,    0,
           0,    0,    0,    0,   60,    7,    0],
       [   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0],
       [ 256,    0,    0,    0,   99,    0,    0,    0, 1222,   39,    0,
           0,    0,    0,    0,  115,   45,    0],
       [ 420,    0,    0,    0,  185,    0,    0,    0, 4170,  119,    0,
           0,    0,    0,    0,  234,   81,    0],
       [ 653,    0,    0,    0,  110,    0,    0,    0,  247,   10,    0,
           0,    0,    0,    0,  296,    7,    0],
