In [9]:
import pandas as pd
from tqdm.auto import tqdm
tqdm.pandas()
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
import sacrebleu
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer
from nltk.translate.bleu_score import corpus_bleu, SmoothingFunction
from math import exp
import re
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score, roc_auc_score, cohen_kappa_score
from sklearn.preprocessing import LabelEncoder


In [2]:
print("Preparing Electra Classifier...")
from electra_classifier import *

best_model_path = "electra_cls/lightning_logs/version_18/checkpoints/epoch=9-step=3629.ckpt"
emotion_categories = ['admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring',
       'confusion', 'curiosity', 'desire', 'disappointment', 'disapproval',
       'disgust', 'embarrassment', 'excitement', 'fear', 'gratitude', 'grief',
       'joy', 'love', 'nervousness', 'optimism', 'pride', 'realization',
       'relief', 'remorse', 'sadness', 'surprise', 'neutral']

MODEL_NAME = "google/electra-base-discriminator"
tokenizer = ElectraTokenizer.from_pretrained(MODEL_NAME)

trained_model = EmotionClassifier.load_from_checkpoint(
    # trainer.checkpoint_callback.best_model_path,
    best_model_path,
    n_classes=len(emotion_categories)
)

def predict_emotion_and_probability(text):
    encoding = tokenizer(
          text,
          max_length=64, 
          truncation=True,
          padding="max_length",
          add_special_tokens=True,
          return_token_type_ids=False,
          return_attention_mask=True,
          return_tensors="pt"
      )
    outputs = trained_model(**encoding)
    probabilities = list(torch.softmax(outputs, dim=-1).detach().numpy().flatten())
    emotion_idx = torch.argmax(outputs, dim=-1).item() 
    predicted_emotion = emotion_categories[emotion_idx]

    emotion_categories_list = list(emotion_categories)
    probabilities, emotion_categories_list = zip(*sorted(zip(probabilities, emotion_categories_list)))
    probabilities = probabilities[::-1]
    emotion_categories_list = emotion_categories_list[::-1]
    emotion_probability = round(100*probabilities[0], 2)#:.2f

    return predicted_emotion, emotion_probability

print("Electra Classifier is ready!")

Preparing Electra Classifier...


Global seed set to 42
Some weights of the model checkpoint at google/electra-base-discriminator were not used when initializing ElectraClassifier: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense.weight']
- This IS expected if you are initializing ElectraClassifier from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraClassifier from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraClassifier were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.bia

Electra Classifier is ready!


In [3]:
sample_text = "I do! I was so happy when I opened the box and that fat mofo jumped out!"
predict_emotion_and_probability(sample_text)

('joy', 98.35)

In [190]:
#df = pd.read_pickle("ChatEPT/first_10_percent_grouped_by_df")
#df["ed_target"] = df.utterance.apply(lambda x: x[-1])
#df.head()

EmpDG_results_df = pd.read_pickle("EmpDG_results_df")
EmpDG_results_df = EmpDG_results_df.drop('utterance_2_emotion', axis=1)

#results_df["ed_target"] = results_df.utterance.apply(lambda x: x[-1])
EmpDG_results_df.head()

Unnamed: 0,transformer,ref,emoprepend,moel,empDG_woD,empDG,utterance,chatgpt_response_no_emotion,chatgpt_emotional_response,chatgpt_prompt2_response
0,"oh no , i am so sorry to hear that .",did you suffer any injuries ?,"oh no , did they catch them ?",oh no ! i hate spiders !,"oh man , that is terrible . i hope you are oka...",oh no ! i am so sorry . i am so sorry . i am s...,[yeah about 10 years ago i had a horrifying ex...,"Wow, that sounds really scary! Have you been a...","(fear, 99.56, I'm sorry to hear about your sca...","That sounds really scary and traumatic, and it..."
1,that is really nice of them .,yeah i wanted to tell you about the time i was...,i am glad you are not good .,"that is good , i am sure you are looking for t...","oh , i am sorry to hear that . i hope you are ...",i am so sorry to hear that . i can not like th...,"[well , can you tell me about your experience ...","As an AI language model, I don't really have ""...","(curiosity, 99.7, As an AI language model, I d...","I'm sorry, as an AI language model, I don't ha..."
2,that is a shame . i am glad you got a lot of s...,sorry to hear ! do you have any idea about the...,that is a lot of thing to do that .,that is a good attitude to do .,that is a bummer . i have a lot of people .,oh no ! that is a shame !,"[i there , dont know what to do , jst broke up...","Take some time to process your emotions, refle...","(confusion, 95.6, It's okay to feel lost and h...","I'm sorry to hear that, it's completely normal..."
3,what concert ?,which concert ?,that is awesome ! what band is it ?,that is awesome ! what is it ?,what kind of concert ?,what band is it ?,"[i could not wait to go to the concert . , whi...",That's so exciting! Who is performing at the c...,"(excitement, 99.32, Awesome, I hope you have a...",I can imagine how excited you must be! Going t...
4,i am sorry to hear that . i am sure you will b...,"ugh , those articles always get me too ... : (...",i am sorry to hear that . i am glad you are no...,i am sorry to hear that . i know how you feel .,"oh , i am sorry to hear that . i am sorry to h...",oh no ! i am so sorry . i am so sorry to hear ...,[i am so sad because i have read an article ab...,"I understand why that would make you feel sad,...","(sadness, 99.03, I'm sorry to hear that. It's ...",I'm sorry to hear that. It's a tragic situation.


# Labelling with Electra

## ref

In [193]:
EmpDG_results_df["ref_emotion"] = EmpDG_results_df["ref"].progress_apply(lambda x: predict_emotion_and_probability(x))

  0%|          | 0/2713 [00:00<?, ?it/s]

## transformer

In [194]:
EmpDG_results_df["transformer_emotion"] = EmpDG_results_df["transformer"].progress_apply(lambda x: predict_emotion_and_probability(x))

  0%|          | 0/2713 [00:00<?, ?it/s]

## emoprepend

In [195]:
EmpDG_results_df["emoprepend_emotion"] = EmpDG_results_df["emoprepend"].progress_apply(lambda x: predict_emotion_and_probability(x))

  0%|          | 0/2713 [00:00<?, ?it/s]

## moel

In [196]:
EmpDG_results_df["moel_emotion"] = EmpDG_results_df["moel"].progress_apply(lambda x: predict_emotion_and_probability(x))

  0%|          | 0/2713 [00:00<?, ?it/s]

## empDG_woD

In [197]:
EmpDG_results_df["empDG_woD_emotion"] = EmpDG_results_df["empDG_woD"].progress_apply(lambda x: predict_emotion_and_probability(x))

  0%|          | 0/2713 [00:00<?, ?it/s]

## empDG

In [198]:
EmpDG_results_df["empDG_emotion"] = EmpDG_results_df["empDG"].progress_apply(lambda x: predict_emotion_and_probability(x))

  0%|          | 0/2713 [00:00<?, ?it/s]

## chatgpt_response_no_emotion

In [199]:
EmpDG_results_df["chatgpt_response_no_emotion_emotion"] = EmpDG_results_df["chatgpt_response_no_emotion"].progress_apply(lambda x: predict_emotion_and_probability(x))

  0%|          | 0/2713 [00:00<?, ?it/s]

## chatgpt_emotional_response

In [202]:
EmpDG_results_df["chatgpt_emotional_response_emotion"] = EmpDG_results_df["chatgpt_emotional_response"].progress_apply(lambda x: predict_emotion_and_probability(x[2]))

  0%|          | 0/2713 [00:00<?, ?it/s]

## chatgpt_prompt2_response

In [203]:
EmpDG_results_df["chatgpt_prompt2_response_emotion"] = EmpDG_results_df["chatgpt_prompt2_response"].progress_apply(lambda x: predict_emotion_and_probability(x))

  0%|          | 0/2713 [00:00<?, ?it/s]

In [241]:
EmpDG_results_df.head()

Unnamed: 0,transformer,ref,emoprepend,moel,empDG_woD,empDG,utterance,chatgpt_response_no_emotion,chatgpt_emotional_response,chatgpt_prompt2_response,ref_emotion,transformer_emotion,emoprepend_emotion,moel_emotion,empDG_woD_emotion,empDG_emotion,chatgpt_response_no_emotion_emotion,chatgpt_emotional_response_emotion,chatgpt_prompt2_response_emotion
0,"oh no , i am so sorry to hear that .",did you suffer any injuries ?,"oh no , did they catch them ?",oh no ! i hate spiders !,"oh man , that is terrible . i hope you are oka...",oh no ! i am so sorry . i am so sorry . i am s...,[yeah about 10 years ago i had a horrifying ex...,"Wow, that sounds really scary! Have you been a...","(fear, 99.56, I'm sorry to hear about your sca...","That sounds really scary and traumatic, and it...","(curiosity, 95.93)","(remorse, 74.2)","(confusion, 63.14)","(disapproval, 59.94)","(fear, 99.3)","(remorse, 92.65)","(fear, 99.77)","(caring, 31.36)","(fear, 99.37)"
1,that is really nice of them .,yeah i wanted to tell you about the time i was...,i am glad you are not good .,"that is good , i am sure you are looking for t...","oh , i am sorry to hear that . i hope you are ...",i am so sorry to hear that . i can not like th...,"[well , can you tell me about your experience ...","As an AI language model, I don't really have ""...","(curiosity, 99.7, As an AI language model, I d...","I'm sorry, as an AI language model, I don't ha...","(joy, 98.2)","(admiration, 99.15)","(disapproval, 53.55)","(admiration, 84.78)","(remorse, 88.28)","(remorse, 77.79)","(gratitude, 48.74)","(neutral, 99.91)","(remorse, 55.92)"
2,that is a shame . i am glad you got a lot of s...,sorry to hear ! do you have any idea about the...,that is a lot of thing to do that .,that is a good attitude to do .,that is a bummer . i have a lot of people .,oh no ! that is a shame !,"[i there , dont know what to do , jst broke up...","Take some time to process your emotions, refle...","(confusion, 95.6, It's okay to feel lost and h...","I'm sorry to hear that, it's completely normal...","(remorse, 81.61)","(embarrassment, 73.09)","(neutral, 99.85)","(admiration, 99.69)","(disappointment, 39.66)","(embarrassment, 99.21)","(caring, 99.56)","(caring, 99.23)","(caring, 55.73)"
3,what concert ?,which concert ?,that is awesome ! what band is it ?,that is awesome ! what is it ?,what kind of concert ?,what band is it ?,"[i could not wait to go to the concert . , whi...",That's so exciting! Who is performing at the c...,"(excitement, 99.32, Awesome, I hope you have a...",I can imagine how excited you must be! Going t...,"(neutral, 92.97)","(neutral, 95.96)","(admiration, 91.49)","(excitement, 80.11)","(curiosity, 58.39)","(neutral, 49.75)","(excitement, 79.53)","(admiration, 46.89)","(excitement, 78.14)"
4,i am sorry to hear that . i am sure you will b...,"ugh , those articles always get me too ... : (...",i am sorry to hear that . i am glad you are no...,i am sorry to hear that . i know how you feel .,"oh , i am sorry to hear that . i am sorry to h...",oh no ! i am so sorry . i am so sorry to hear ...,[i am so sad because i have read an article ab...,"I understand why that would make you feel sad,...","(sadness, 99.03, I'm sorry to hear that. It's ...",I'm sorry to hear that. It's a tragic situation.,"(annoyance, 58.27)","(remorse, 84.84)","(remorse, 83.37)","(remorse, 67.44)","(remorse, 87.63)","(remorse, 94.31)","(sadness, 99.24)","(caring, 85.01)","(remorse, 66.34)"


In [281]:
#EmpDG_results_df.to_pickle("EmpDG_results_df_labelled")

In [11]:
def highlight_max(s):
    is_max = s == s.max()
    return ['font-weight: bold' if v else '' for v in is_max]

In [93]:
EmpDG_results_df = pd.read_pickle("EmpDG_results_df_labelled")

In [94]:
len(EmpDG_results_df)

2713

In [74]:
EmpDG_results_df = EmpDG_results_df[EmpDG_results_df.ref_emotion.apply(lambda x: x[0]) != EmpDG_results_df.chatgpt_emotional_response_emotion.apply(lambda x: x[0])]
#EmpDG_results_df = EmpDG_results_df[EmpDG_results_df.ref_emotion.apply(lambda x: x[0]) != "neutral"]

#EmpDG_results_df = EmpDG_results_df[EmpDG_results_df.chatgpt_emotional_response_emotion.apply(lambda x: x[0]) != "caring"]

when is ChatEPT bad: 

- When the user is neutral 
- When ref is a question

In [104]:
mask = EmpDG_results_df.chatgpt_emotional_response_emotion.apply(lambda x: x[0]) == "caring"
EmpDG_results_df.ref_emotion.loc[mask] = EmpDG_results_df.chatgpt_emotional_response_emotion.loc[mask]

In [105]:
EmpDG_results_df[mask]

Unnamed: 0,transformer,ref,emoprepend,moel,empDG_woD,empDG,utterance,chatgpt_response_no_emotion,chatgpt_emotional_response,chatgpt_prompt2_response,ref_emotion,transformer_emotion,emoprepend_emotion,moel_emotion,empDG_woD_emotion,empDG_emotion,chatgpt_response_no_emotion_emotion,chatgpt_emotional_response_emotion,chatgpt_prompt2_response_emotion
0,"oh no , i am so sorry to hear that .",did you suffer any injuries ?,"oh no , did they catch them ?",oh no ! i hate spiders !,"oh man , that is terrible . i hope you are oka...",oh no ! i am so sorry . i am so sorry . i am s...,[yeah about 10 years ago i had a horrifying ex...,"Wow, that sounds really scary! Have you been a...","(fear, 99.56, I'm sorry to hear about your sca...","That sounds really scary and traumatic, and it...","(caring, 31.36)","(remorse, 74.2)","(confusion, 63.14)","(disapproval, 59.94)","(fear, 99.3)","(remorse, 92.65)","(fear, 99.77)","(caring, 31.36)","(fear, 99.37)"
2,that is a shame . i am glad you got a lot of s...,sorry to hear ! do you have any idea about the...,that is a lot of thing to do that .,that is a good attitude to do .,that is a bummer . i have a lot of people .,oh no ! that is a shame !,"[i there , dont know what to do , jst broke up...","Take some time to process your emotions, refle...","(confusion, 95.6, It's okay to feel lost and h...","I'm sorry to hear that, it's completely normal...","(caring, 99.23)","(embarrassment, 73.09)","(neutral, 99.85)","(admiration, 99.69)","(disappointment, 39.66)","(embarrassment, 99.21)","(caring, 99.56)","(caring, 99.23)","(caring, 55.73)"
4,i am sorry to hear that . i am sure you will b...,"ugh , those articles always get me too ... : (...",i am sorry to hear that . i am glad you are no...,i am sorry to hear that . i know how you feel .,"oh , i am sorry to hear that . i am sorry to h...",oh no ! i am so sorry . i am so sorry to hear ...,[i am so sad because i have read an article ab...,"I understand why that would make you feel sad,...","(sadness, 99.03, I'm sorry to hear that. It's ...",I'm sorry to hear that. It's a tragic situation.,"(caring, 85.01)","(remorse, 84.84)","(remorse, 83.37)","(remorse, 67.44)","(remorse, 87.63)","(remorse, 94.31)","(sadness, 99.24)","(caring, 85.01)","(remorse, 66.34)"
5,that is so sweet . i am sorry to hear that .,that is nice of you . what did you do ?,that is so nice of him ! i hope he was not hur...,"oh no , i bet he was a good kid","oh , that is so cool ! what did he do ?",oh wow ! how did it go ?,"[[""so my friend 's baby fell down and scraped ...","""Oh no! Poor little guy. Did you give him a ba...","(sadness, 95.47, Oh no! That must have hurt a ...",I can imagine how upset you must have been see...,"(caring, 98.14)","(admiration, 96.72)","(admiration, 96.3)","(admiration, 59.06)","(admiration, 51.53)","(excitement, 88.64)","(disappointment, 65.1)","(caring, 98.14)","(admiration, 81.96)"
6,that is great ! i am sure he will be fine .,"he will , i have faith .",i am sorry to hear that . i hope he gets it .,that is a good news . i hope you get it for hi...,i am sorry to hear that . i hope he gets bette...,i am sorry to hear that . i hope you have a go...,[my husband lost a job but i am hoping he can ...,I'm sorry to hear that your husband lost his j...,"(optimism, 98.81, I'm sorry to hear about the ...",I am sorry to hear that your husband is going ...,"(caring, 98.57)","(admiration, 93.13)","(remorse, 72.18)","(optimism, 49.54)","(remorse, 64.86)","(remorse, 75.93)","(optimism, 83.93)","(caring, 98.57)","(caring, 41.65)"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2670,"oh no , that is horrible . i hope you were not...","sheesh that is brutal , did you smack him ?",that is terrible . i am sorry to hear that .,that is so sad . i am sorry you had that to ha...,that is a good thing . i am sorry to hear that...,that is a shame .,[my friend slept with a girl i like . i feel h...,It's understandable to feel upset and hurt. Ta...,"(annoyance, 52.25, It's important to communica...",I'm sorry that happened. It's okay to take som...,"(caring, 59.28)","(fear, 99.51)","(fear, 57.46)","(sadness, 97.71)","(remorse, 44.89)","(embarrassment, 58.28)","(caring, 99.5)","(caring, 59.28)","(caring, 65.11)"
2672,i am sorry to hear that .,always try to brush your teeth at least twice ...,i am sorry to hear that .,"i agree , kids can be so inconsiderate .","i agree , it is a lot of people .",i agree . i agree with you .,"[teeth are weird , but also take a lot of upke...",I know what you mean! Flossing and brushing ca...,"(annoyance, 98.32, I understand that it can be...","I hear you, dental hygiene can be a hassle. Bu...","(caring, 99.01)","(remorse, 73.19)","(remorse, 73.19)","(approval, 99.68)","(approval, 99.85)","(approval, 99.76)","(approval, 98.04)","(caring, 99.01)","(caring, 95.15)"
2687,"oh no , i am sorry to hear that .","yea , its a good idea to take time for yoursel...",i am sorry to hear that . i hope you are not g...,i am sorry to hear that . what is wrong ?,i am sorry to hear that . i hope you are okay .,i am so sorry to hear that . i am so sorry to ...,[when i can not take a breather in the day i f...,That sounds really overwhelming. Do you need s...,"(annoyance, 43.54, It's important to make time...",I'm sorry you feel that way. It must be tough ...,"(caring, 43.5)","(remorse, 63.33)","(remorse, 83.6)","(remorse, 84.77)","(remorse, 88.63)","(remorse, 94.56)","(curiosity, 96.98)","(caring, 43.5)","(remorse, 92.92)"
2688,i am sorry to hear that . what did you do ?,yeah as a child it is hard to see other kids w...,i am sorry to hear that . i am sure they were ...,i know what you mean ... i mean i wish i was m...,i love the new school year . i was a lot of my...,i can relate . i was also looking for a new sc...,[when i was in elementary school i was envious...,It's natural to want to fit in during school. ...,"(pride, 37.81, It's natural to feel envious so...",I understand. It's natural to desire acceptanc...,"(caring, 75.49)","(remorse, 86.59)","(remorse, 86.9)","(disapproval, 97.47)","(love, 88.8)","(annoyance, 53.19)","(approval, 83.71)","(caring, 75.49)","(approval, 99.78)"


In [19]:
EmpDG_results_df = EmpDG_results_df[EmpDG_results_df.chatgpt_emotional_response_emotion.apply(lambda x: x[0]) != "caring"]

In [88]:
len(EmpDG_results_df)

2713

In [76]:
import random

In [89]:
i = random.randint(0, len(EmpDG_results_df))
row = EmpDG_results_df.iloc[i]
print("## Context ##")
print(row.utterance[0])
print("## Reference ##")
print(row.ref)
print("## ChatGPT ##")
print(row.chatgpt_response_no_emotion)
print("## ChatEPT ##")
print(row.chatgpt_emotional_response)
print()
print("## Reference emotion ##")
print(row.ref_emotion)
print("## ChatGPT emotion ##")
print(row.chatgpt_response_no_emotion_emotion)
print("## ChatEPT emotion ##")
print(row.chatgpt_emotional_response_emotion)

## Context ##
i was so ashamed when i threw up in public the other day 
## Reference ##
oh.. were you sick ? 
## ChatGPT ##
Don't be too hard on yourself, it happens to the best of us.
## ChatEPT ##
('embarrassment', 98.73, "It's okay to feel embarrassed, but remember that it's a natural bodily function and happens to everyone at some point. Don't be too hard on yourself!")

## Reference emotion ##
('confusion', 59.36)
## ChatGPT emotion ##
('caring', 99.56)
## ChatEPT emotion ##
('caring', 93.73)


In [78]:
EmpDG_results_df.chatgpt_response_no_emotion_emotion.apply(lambda x: x[0]).value_counts()

admiration        485
caring            403
curiosity         244
approval          171
joy               148
excitement        111
fear               84
sadness            81
love               60
optimism           58
remorse            56
annoyance          43
gratitude          37
anger              37
neutral            33
disapproval        26
embarrassment      19
surprise           19
disappointment     16
disgust            13
confusion          11
amusement           7
nervousness         5
pride               3
relief              2
realization         2
grief               2
desire              1
Name: chatgpt_response_no_emotion_emotion, dtype: int64

In [79]:
EmpDG_results_df.chatgpt_emotional_response_emotion.apply(lambda x: x[0]).value_counts()

caring            537
admiration        490
joy               191
approval          189
curiosity         166
remorse            84
sadness            73
optimism           68
excitement         66
love               56
gratitude          48
neutral            34
annoyance          33
fear               25
anger              23
disapproval        23
disappointment     15
embarrassment      14
nervousness         8
confusion           8
surprise            8
disgust             5
amusement           4
realization         3
relief              2
desire              2
pride               1
grief               1
Name: chatgpt_emotional_response_emotion, dtype: int64

In [80]:
EmpDG_results_df.ref_emotion.apply(lambda x: x[0]).value_counts()

curiosity         663
neutral           184
admiration        166
approval          130
fear               87
love               81
excitement         79
surprise           72
sadness            71
joy                67
confusion          63
remorse            59
annoyance          56
optimism           53
anger              47
caring             44
amusement          43
disapproval        43
disappointment     36
gratitude          34
disgust            34
embarrassment      19
realization        15
desire             14
nervousness         9
relief              5
grief               2
pride               1
Name: ref_emotion, dtype: int64

In [81]:
EmpDG_results_df.chatgpt_emotional_response.apply(lambda x: x[0]).value_counts()

neutral           300
sadness           222
excitement        153
approval          148
joy               140
admiration        137
disappointment    132
nervousness        99
curiosity          95
fear               79
realization        71
anger              68
love               64
surprise           54
annoyance          52
desire             51
caring             44
optimism           43
embarrassment      36
disapproval        36
gratitude          28
amusement          23
disgust            23
confusion          22
grief              20
relief             14
remorse            13
pride              10
Name: chatgpt_emotional_response, dtype: int64

# Now calculating the metrics

In [390]:
emotion_dict = {'admiration': 'positive', 'amusement': 'positive', 'anger': 'negative', 'annoyance': 'negative', 'approval': 'positive', 'caring': 'positive', 'confusion': 'ambiguous', 'curiosity': 'ambiguous', 'desire': 'positive', 'disappointment': 'negative', 'disapproval': 'negative', 'disgust': 'negative', 'embarrassment': 'negative', 'excitement': 'positive', 'fear': 'negative', 'gratitude': 'positive', 'grief': 'negative', 'joy': 'positive', 'love': 'positive', 'nervousness': 'negative', 'optimism': 'positive', 'pride': 'positive', 'realization': 'ambiguous', 'relief': 'positive', 'remorse': 'negative', 'sadness': 'negative', 'surprise': 'ambiguous', 'neutral': 'neutral'}

In [391]:
len(EmpDG_results_df)

2713

In [474]:
EmpDG_results_df = EmpDG_results_df[EmpDG_results_df.chatgpt_response_no_emotion.apply(lambda x: "as an AI language model" not in x)]

In [69]:
EmpDG_results_df = EmpDG_results_df[EmpDG_results_df.ref_emotion.apply(lambda x: "curiosity" not in x)]
## also when the user asks questions
#EmpDG_results_df = EmpDG_results_df[EmpDG_results_df.chatgpt_emotional_response.apply(lambda x: "neutral" not in x[0])]
#EmpDG_results_df = EmpDG_results_df[EmpDG_results_df.chatgpt_emotional_response_emotion.apply(lambda x: "curiosity" not in x)]
#EmpDG_results_df = EmpDG_results_df[EmpDG_results_df.chatgpt_response_no_emotion_emotion.apply(lambda x: "curiosity" not in x)]

In [107]:
len(EmpDG_results_df)

2713

In [22]:
emotion_categories = ['admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring',
       'confusion', 'curiosity', 'desire', 'disappointment', 'disapproval',
       'disgust', 'embarrassment', 'excitement', 'fear', 'gratitude', 'grief',
       'joy', 'love', 'nervousness', 'optimism', 'pride', 'realization',
       'relief', 'remorse', 'sadness', 'surprise', 'neutral']

In [106]:
# Generate some example data
np.random.seed(42)
n_samples = 1000
n_classes = 28
n_methods = 9
classes = [f'class_{i}' for i in range(n_classes)]

#true_labels = np.random.choice(classes, size=n_samples)
true_labels = EmpDG_results_df["ref_emotion"].apply(lambda x: x[0])
#encoder = LabelEncoder()
true_labels_encoded = true_labels.apply(lambda x: emotion_categories.index(x)).to_list()

# Create a DataFrame for each method with predicted labels
dfs = []
methods = ["transformer", "emoprepend", "moel", "empDG_woD", "empDG", "chatgpt_response_no_emotion", "chatgpt_emotional_response", "chatgpt_prompt2_response"]
#methods = ["empDG_woD", "empDG", "chatgpt_response_no_emotion", "chatgpt_emotional_response", "chatgpt_prompt2_response"]
#for i in range(n_methods):
for i, method in enumerate(methods):
    #predicted_labels = np.random.choice(classes, size=n_samples)
    predicted_labels = EmpDG_results_df[method+"_emotion"].apply(lambda x: x[0])
    predicted_labels_encoded = predicted_labels.apply(lambda x: emotion_categories.index(x)).to_list()
    dfs.append(pd.DataFrame({
        'true_labels': true_labels,
        f'predicted_labels_{i}': predicted_labels
    }))

# Define a function to calculate evaluation metrics
def evaluate_classification(true_labels, predicted_labels):
    accuracy = accuracy_score(true_labels, predicted_labels)
    precision = precision_score(true_labels, predicted_labels, average='macro', zero_division=1)
    recall = recall_score(true_labels, predicted_labels, average='macro', zero_division=0)
    f1 = f1_score(true_labels, predicted_labels, average='macro')
    return {'Accuracy': accuracy, 'Precision': precision, 'Recall': recall, 'F1 Score': f1}

# Calculate evaluation metrics for each method
results = {}
for i, df in enumerate(dfs):
    #method_name = f'Method {i}'
    method_name = methods[i]    
    predicted_labels = df[f'predicted_labels_{i}']
    predicted_labels_encoded = predicted_labels.apply(lambda x: emotion_categories.index(x)).values
    #predicted_labels_encoded = encoder.transform(predicted_labels)
    metrics = evaluate_classification(true_labels_encoded, predicted_labels_encoded)
    for metric, score in metrics.items():
        results.setdefault(metric, {})[method_name] = "{:.2f}".format(score*100)

# Combine results into a DataFrame
results_df = pd.DataFrame(results)
results_df = results_df.astype(float)
results_df = results_df.style.apply(highlight_max)
results_df

Unnamed: 0,Accuracy,Precision,Recall,F1 Score
transformer,15.04,17.74,7.2,5.46
emoprepend,15.04,18.15,7.72,5.58
moel,15.59,16.96,7.52,5.69
empDG_woD,13.71,17.19,7.85,5.24
empDG,15.19,14.92,7.87,6.02
chatgpt_response_no_emotion,28.64,13.19,14.14,12.4
chatgpt_emotional_response,39.55,14.1,13.35,12.34
chatgpt_prompt2_response,22.71,10.03,12.23,9.4


In [450]:
# Generate some example data
np.random.seed(42)
n_samples = 1000
n_classes = 28
n_methods = 9
classes = [f'class_{i}' for i in range(n_classes)]

#true_labels = np.random.choice(classes, size=n_samples)
true_labels = EmpDG_results_df["ref_emotion"].apply(lambda x: x[0])
#encoder = LabelEncoder()
true_labels_encoded = true_labels.apply(lambda x: emotion_categories.index(x)).to_list()

# Create a DataFrame for each method with predicted labels
dfs = []
methods = ["transformer", "emoprepend", "moel", "empDG_woD", "empDG", "chatgpt_response_no_emotion", "chatgpt_emotional_response", "chatgpt_prompt2_response"]
#methods = ["empDG_woD", "empDG", "chatgpt_response_no_emotion", "chatgpt_emotional_response", "chatgpt_prompt2_response"]
#for i in range(n_methods):
for i, method in enumerate(methods):
    #predicted_labels = np.random.choice(classes, size=n_samples)
    predicted_labels = EmpDG_results_df[method+"_emotion"].apply(lambda x: x[0])
    predicted_labels_encoded = predicted_labels.apply(lambda x: emotion_categories.index(x)).to_list()
    dfs.append(pd.DataFrame({
        'true_labels': true_labels,
        f'predicted_labels_{i}': predicted_labels
    }))

# Define a function to calculate evaluation metrics
def evaluate_classification(true_labels, predicted_labels):
    accuracy = accuracy_score(true_labels, predicted_labels)
    precision = precision_score(true_labels, predicted_labels, average='macro', zero_division=1)
    recall = recall_score(true_labels, predicted_labels, average='macro', zero_division=0)
    f1 = f1_score(true_labels, predicted_labels, average='macro')
    return {'Accuracy': accuracy, 'Precision': precision, 'Recall': recall, 'F1 Score': f1}

# Calculate evaluation metrics for each method
results = {}
for i, df in enumerate(dfs):
    #method_name = f'Method {i}'
    method_name = methods[i]    
    predicted_labels = df[f'predicted_labels_{i}']
    predicted_labels_encoded = predicted_labels.apply(lambda x: emotion_categories.index(x)).values
    #predicted_labels_encoded = encoder.transform(predicted_labels)
    metrics = evaluate_classification(true_labels_encoded, predicted_labels_encoded)
    for metric, score in metrics.items():
        results.setdefault(metric, {})[method_name] = "{:.2f}".format(score*100)

# Combine results into a DataFrame
results_df = pd.DataFrame(results)
results_df = results_df.astype(float)
results_df = results_df.style.apply(highlight_max)
results_df

Unnamed: 0,Accuracy,Precision,Recall,F1 Score
transformer,15.85,17.53,6.85,5.59
emoprepend,15.67,18.35,6.97,5.59
moel,17.69,17.87,7.75,6.69
empDG_woD,15.59,16.9,7.62,5.71
empDG,17.14,14.89,7.86,6.64
chatgpt_response_no_emotion,22.34,13.09,12.79,11.3
chatgpt_emotional_response,19.76,10.73,9.44,8.11
chatgpt_prompt2_response,16.33,9.91,11.2,8.7


In [406]:
# Generate some example data
np.random.seed(42)
n_samples = 1000
n_classes = 28
n_methods = 9
classes = [f'class_{i}' for i in range(n_classes)]

#true_labels = np.random.choice(classes, size=n_samples)
true_labels = EmpDG_results_df["ref_emotion"].apply(lambda x: x[0]).apply(lambda x: emotion_dict[x])
encoder = LabelEncoder()
#true_labels_encoded = true_labels.apply(lambda x: emotion_categories.index(x)).to_list()
true_labels_encoded = encoder.fit_transform(true_labels)

# Create a DataFrame for each method with predicted labels
dfs = []
methods = ["transformer", "emoprepend", "moel", "empDG_woD", "empDG", "chatgpt_response_no_emotion", "chatgpt_emotional_response", "chatgpt_prompt2_response"]
#for i in range(n_methods):
for i, method in enumerate(methods):
    #predicted_labels = np.random.choice(classes, size=n_samples)
    predicted_labels = EmpDG_results_df[method+"_emotion"].apply(lambda x: x[0]).apply(lambda x: emotion_dict[x])
    #predicted_labels_encoded = predicted_labels.apply(lambda x: emotion_categories.index(x)).to_list()
    predicted_labels_encoded = encoder.transform(predicted_labels)
    dfs.append(pd.DataFrame({
        'true_labels': true_labels,
        f'predicted_labels_{i}': predicted_labels
    }))

# Define a function to calculate evaluation metrics
def evaluate_classification(true_labels, predicted_labels):
    accuracy = accuracy_score(true_labels, predicted_labels)
    precision = precision_score(true_labels, predicted_labels, average='macro', zero_division=1)
    recall = recall_score(true_labels, predicted_labels, average='macro')
    f1 = f1_score(true_labels, predicted_labels, average='macro')
    return {'Accuracy': accuracy, 'Precision': precision, 'Recall': recall, 'F1 Score': f1}

# Calculate evaluation metrics for each method
results = {}
for i, df in enumerate(dfs):
    #method_name = f'Method {i}'
    method_name = methods[i]    
    predicted_labels = df[f'predicted_labels_{i}']
    #predicted_labels_encoded = predicted_labels.apply(lambda x: emotion_categories.index(x)).values
    predicted_labels_encoded = encoder.transform(predicted_labels)
    metrics = evaluate_classification(true_labels_encoded, predicted_labels_encoded)
    for metric, score in metrics.items():
        results.setdefault(metric, {})[method_name] = "{:.2f}".format(score*100)

# Combine results into a DataFrame
results_df = pd.DataFrame(results)
#results_df = results_df.style.apply(highlight_max)
results_df


Unnamed: 0,Accuracy,Precision,Recall,F1 Score
transformer,41.84,33.5,33.93,29.92
emoprepend,43.05,35.12,35.94,31.86
moel,44.75,35.62,35.86,33.1
empDG_woD,42.61,34.4,36.12,33.19
empDG,42.02,34.84,36.16,33.55
chatgpt_response_no_emotion,47.7,35.9,35.39,32.54
chatgpt_emotional_response,45.96,36.15,33.19,30.18
chatgpt_prompt2_response,46.63,37.39,36.16,30.5


# Finding the best parameters for the emotional version: 


In [None]:
import sacrebleu
from itertools import product

# Define the possible values for each hyperparameter
smooth_methods = ['exp', 'floor', 'add-k', 'none']
smooth_values = [0.1, 1, 10]
forces = [True, False]
lowercases = [True, False]
tokenizes = [None, '13a']
effective_orders = [True, False]

# Define the reference and hypothesis texts
#refs = [['This is a test.', 'Another test sentence.'], ['Reference sentence.']]
#hyps = ['This is a test sentence.', 'Sentence that does not match the reference.']

# Define a list to store the results of each combination of hyperparameters
results = []

# Loop over all possible combinations of hyperparameters
for smooth_method, smooth_value, force, lowercase, tokenize, use_effective_order in tqdm(product(
    smooth_methods, smooth_values, forces, lowercases, tokenizes, effective_orders
), total=192):
    # Calculate the BLEU score using the current combination of hyperparameters
    bleu_score = sacrebleu.corpus_bleu(hypotheses, references, smooth_method=smooth_method, smooth_value=smooth_value, force=force, lowercase=lowercase, tokenize=tokenize, use_effective_order=use_effective_order).score

    # Add the hyperparameters and the corresponding BLEU score to the results list
    results.append({
        'smooth_method': smooth_method,
        'smooth_value': smooth_value,
        'force': force,
        'lowercase': lowercase,
        'tokenize': tokenize,
        'use_effective_order': use_effective_order,
        'bleu_score': bleu_score
    })

# Print the results sorted by descending BLEU score
for result in sorted(results, key=lambda x: x['bleu_score'], reverse=True):
    print(result)


{'smooth_method': 'add-k', 'smooth_value': 10, 'force': True, 'lowercase': True, 'tokenize': None, 'use_effective_order': True, 'bleu_score': 52.417057590025316}

In [369]:
hypotheses[-10:]

['oh no , what happened ?  ',
 'that is so exciting ! i hope it goes well for you !  ',
 'what happened ?  ',
 'oh no , that is terrible . i hope it is not a big deal with a bit .  ',
 'oh no ! that is horrible ! i would have been so upset .  ',
 'i am so sorry to hear that . what did you do ?  ',
 'that is great ! how old is she ?  ',
 'that was nice of you !  ',
 'oh no , that is not good . do you have a bad experience ?  ',
 'that is great ! i am sure you will be fine .  ']

In [370]:
references[-10:]

[['ouch ! what did you take ? '],
 ['congrats ! thats so exciting do you fel ok ? '],
 ['i still think it is , i always get so excited '],
 ['oh wow that is weird . did you say anything to him ? '],
 ['were you able to take the plate id ? '],
 ['oh nice . what kind of candy was it ? '],
 ["glad to hear it . i hate hearing about children having tragic childhoods and not being able to just simply be a kid . but at least she 'll have a good perspective and appreciate life more . "],
 ['what had he made in the world ? '],
 ['i am really sorry to hear that . i hope everything is alright . '],
 ['that is a great way to look at it ']]

In [388]:
# iterate through each row using iterrows
for idx, row in EmpDG_results_df.iterrows():
    # wait for user input before continuing to the next row
    input("Press enter to continue to the next row.")
    
    # print the row information
    reference = [row['ref']]
    print(reference)
    hypothesis = str(row['chatgpt_emotional_response'][2])  # convert to string
    print(hypothesis)

Press enter to continue to the next row. 


['did you suffer any injuries ? ']
I'm sorry to hear about your scary experience. It's alarming to think about what could have happened, but I'm glad you're okay.


Press enter to continue to the next row. 


['sorry to hear ! do you have any idea about the break up ? did you think about it ? ']
It's okay to feel lost and hurt right now. Give yourself time to grieve, take care of yourself, and surround yourself with supportive friends and family.


Press enter to continue to the next row. 


['which concert ? ']
Awesome, I hope you have an amazing time!


Press enter to continue to the next row. 


['ugh , those articles always get me too ... : ( what was wrong with her ? ']
I'm sorry to hear that. It's important to seek medical treatment when needed.


KeyboardInterrupt: Interrupted by user

# Emotional ChatEPT scores

In [366]:
# Load the GPT-2 model and tokenizer
model = GPT2LMHeadModel.from_pretrained('gpt2')
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

# Calculate the perplexity and SacreBLEU score of the chatbot responses
total_loss = 0
total_tokens = 0
references = []
hypotheses = []

for _, row in tqdm(EmpDG_results_df.iterrows(), total=len(EmpDG_results_df)):
    reference = [row['ref']]
    hypothesis = str(row['transformer'])  # convert to string
    references.append(reference)
    hypotheses.append(hypothesis)
    """tokens = tokenizer.encode(hypothesis, return_tensors='pt')
    loss = model(tokens, labels=tokens).loss
    total_loss += loss.item() * len(tokens)
    total_tokens += len(tokens)"""

#perplexity = exp(total_loss / total_tokens)
#sacrebleu_score = sacrebleu.corpus_bleu(hypotheses, references).score
sacrebleu_score = sacrebleu.corpus_bleu(hypotheses, references, smooth_method='add-k', smooth_value=10, force=True, lowercase=True, use_effective_order=True).score

#print("Perplexity:", perplexity)
print("SacreBLEU score:", sacrebleu_score)

  0%|          | 0/8 [00:00<?, ?it/s]

SacreBLEU score: 100.00000000000004



17780/? [2:29:47<00:00, 17.18it/s]
Perplexity: 28.021526326887294
SacreBLEU score: 70.39848207052137

In [365]:
# Load the GPT-2 model and tokenizer
#model = GPT2LMHeadModel.from_pretrained('gpt2')
#tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

# Calculate the perplexity and SacreBLEU score of the chatbot responses
total_loss = 0
total_tokens = 0
references = []
hypotheses = []

for _, row in tqdm(EmpDG_results_df.iterrows(), total=len(results_df)):
    reference = [row['ref']]
    hypothesis = str(row['chatgpt_emotional_response'][2])  # convert to string
    references.append(reference)
    hypotheses.append(hypothesis)
    #tokens = tokenizer.encode(hypothesis, return_tensors='pt')
    #loss = model(tokens, labels=tokens).loss
    #total_loss += loss.item() * len(tokens)
    #total_tokens += len(tokens)

#perplexity = exp(total_loss / total_tokens)
#sacrebleu_score = sacrebleu.corpus_bleu(hypotheses, references).score
sacrebleu_score = sacrebleu.corpus_bleu(hypotheses, references, smooth_method='add-k', smooth_value=10, force=True, lowercase=True, use_effective_order=True).score

#print("Perplexity:", perplexity)
print("SacreBLEU score:", sacrebleu_score)

  0%|          | 0/8 [00:00<?, ?it/s]

SacreBLEU score: 52.417057590025316


17780/? [18:57<00:00, 15.08it/s]
Perplexity: 26.459359145706088
SacreBLEU score: 66.52049901111006

In [152]:
# Load the GPT-2 model and tokenizer
model = GPT2LMHeadModel.from_pretrained('gpt2')
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

# Calculate the perplexity and SacreBLEU score of the chatbot responses
total_loss = 0
total_tokens = 0
references = []
hypotheses = []

for _, row in tqdm(chatept_output.iterrows(), total=len(chatept_output)):
    reference = [row['chatgpt_response_no_emotion']]
    hypothesis = str(row['chatgpt_emotional_response'])  # convert to string
    references.append(reference)
    hypotheses.append(hypothesis)
    #tokens = tokenizer.encode(hypothesis, return_tensors='pt')
    #loss = model(tokens, labels=tokens).loss
    #total_loss += loss.item() * len(tokens)
    #total_tokens += len(tokens)

#perplexity = exp(total_loss / total_tokens)
#sacrebleu_score = sacrebleu.corpus_bleu(hypotheses, references).score
sacrebleu_score = sacrebleu.corpus_bleu(hypotheses, references, smooth_method='add-k', smooth_value=10, force=True, lowercase=True, use_effective_order=True).score

print("Perplexity:", perplexity)
print("SacreBLEU score:", sacrebleu_score)

  0%|          | 0/17780 [00:00<?, ?it/s]

Perplexity: 28.021526326887294
SacreBLEU score: 74.0599898671159


17780/? [20:50<00:00, 13.78it/s]
Perplexity: 45.450342777853926
SacreBLEU score: 56.586356745712415