# EmoEvent en train (baseline) experiment

In [1]:
from datasets import Dataset, load_dataset, DatasetDict, ClassLabel
import numpy as np
import torch
import pandas as pd

#Seeding for deterministic results i.e. showing same output 
RANDOM_SEED = 64
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
if torch.cuda.is_available():
   torch.cuda.manual_seed(RANDOM_SEED)
   torch.cuda.manual_seed_all(RANDOM_SEED) 
   torch.backends.cudnn.deterministic = True  
   torch.backends.cudnn.benchmark = False

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
function_mapping = {'OTHER': ['joy', 'love', 'surprise'],
           'NOT_INTERESTED': [''], 
           'DISLIKE':[''], 
           'NOT_CORRECT': [''], 
           'PESSIMISTIC':['sadness'], 
           'WORRIED':['fear'], 
           'ANGRY': ['anger'], 
           'DISAPPOINTED': [''], 
           'BORED': [''], 
           'NOT_APPROVE':[''], 
           'NOT_IMPORTANT': [''], 
           'DISAGREE': [''], 
           'WARN': [''], 
           'COMPLAIN': [''], 
           'THREATEN': [''], 
           'UNWILLING': [''], 
           'DISTRUST' : [''],
           'REFUSE': [''] }

emotion = load_dataset('emotion')
mapping = {
    0:"sadness",
    1:"joy",
    2:"love",
    3:"anger",
    4:"fear",
    5:"surprise"
}
emotion = emotion.map(lambda example: {'emotion': mapping[example['label']]}, 
                      remove_columns=['label'])
def map_labels(batch):
    batch['function'] = [[] for _ in range(len(batch['text']))]
    for i, item in enumerate(batch["function"]):
        for key, value in function_mapping.items():
            for emotion in value:
                if emotion == batch["emotion"][i]:
                    batch["function"][i] = key
                    break
    return batch
emotion = emotion.map(map_labels, batched=True)

No config specified, defaulting to: emotion/split
Found cached dataset emotion (/home/njfernandez/.cache/huggingface/datasets/emotion/split/1.0.0/cca5efe2dfeb58c1d098e0f9eeb200e9927d889b5a03c67097275dfb5fe463bd)
100%|██████████| 3/3 [00:00<00:00, 666.33it/s]
Loading cached processed dataset at /home/njfernandez/.cache/huggingface/datasets/emotion/split/1.0.0/cca5efe2dfeb58c1d098e0f9eeb200e9927d889b5a03c67097275dfb5fe463bd/cache-da33b75eb48814c5.arrow
Loading cached processed dataset at /home/njfernandez/.cache/huggingface/datasets/emotion/split/1.0.0/cca5efe2dfeb58c1d098e0f9eeb200e9927d889b5a03c67097275dfb5fe463bd/cache-c9fa37d993431ea5.arrow
Loading cached processed dataset at /home/njfernandez/.cache/huggingface/datasets/emotion/split/1.0.0/cca5efe2dfeb58c1d098e0f9eeb200e9927d889b5a03c67097275dfb5fe463bd/cache-9e625d0ab4c5791f.arrow
Loading cached processed dataset at /home/njfernandez/.cache/huggingface/datasets/emotion/split/1.0.0/cca5efe2dfeb58c1d098e0f9eeb200e9927d889b5a03c670972

In [3]:
df_train = emotion['train'].to_pandas()
df_valid = emotion['validation'].to_pandas()
df_test = emotion['test'].to_pandas()

df_train

Unnamed: 0,text,emotion,function
0,i didnt feel humiliated,sadness,PESSIMISTIC
1,i can go from feeling so hopeless to so damned...,sadness,PESSIMISTIC
2,im grabbing a minute to post i feel greedy wrong,anger,ANGRY
3,i am ever feeling nostalgic about the fireplac...,love,OTHER
4,i am feeling grouchy,anger,ANGRY
...,...,...,...
15995,i just had a very brief time in the beanbag an...,sadness,PESSIMISTIC
15996,i am now turning and i feel pathetic that i am...,sadness,PESSIMISTIC
15997,i feel strong and good overall,joy,OTHER
15998,i feel like this was such a rude comment and i...,anger,ANGRY


In [4]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer()

In [5]:
X_train = cv.fit_transform(df_train['text'])
X_valid = cv.transform(df_valid['text'])
X_test = cv.transform(df_test['text'])

y_train = df_train["function"]
y_valid = df_valid["function"]
y_test = df_test["function"]

In [6]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report

classifier = MultinomialNB()
classifier.fit(X_train, y_train)
y_pred_test = classifier.predict(X_test)
clf_report = classification_report(y_test, y_pred_test, zero_division=0)
print(clf_report)

              precision    recall  f1-score   support

       ANGRY       0.93      0.53      0.68       275
       OTHER       0.83      0.98      0.90       920
 PESSIMISTIC       0.82      0.89      0.85       581
     WORRIED       0.88      0.50      0.64       224

    accuracy                           0.84      2000
   macro avg       0.86      0.72      0.77      2000
weighted avg       0.85      0.84      0.82      2000



In [7]:
clf_report = classification_report(y_test, y_pred_test, zero_division=0, output_dict=True)
df_results = pd.DataFrame(clf_report).transpose()
with open(r'classification_report_baseline_emotion_naive_bayes.csv', 'w') as csv_file:
    df_results.to_csv(path_or_buf=csv_file)

In [8]:
df_test['predicted_label'] = y_pred_test
df_test.to_csv('preds_baseline_emotion_naive_bayes.tsv',header =True, sep = '\t',index=False)

# Function basic experiment

In [9]:
comfunctions_basic = 'comfunct_basic.txt'
df = pd.read_csv(comfunctions_basic, delimiter=";")
df = df[~df['function'].isin(['NOT_INTERESTED', 'BORED', 'DISLIKE', 'NOT_CORRECT', 'DISAPPOINTED', 'COMPLAIN', 'NOT_APPROVE',
                             'NOT_IMPORTANT', 'DISAGREE', 'REFUSE', 'WARN', 'THREATEN', 'UNWILLING', 'DISTRUST' ])]

df

Unnamed: 0,text,function
209,I'm fairly pessimistic about it,PESSIMISTIC
210,I'm fairly pessimistic,PESSIMISTIC
211,I'm pessimistic about it,PESSIMISTIC
212,I'm pessimistic,PESSIMISTIC
213,I'm rather sceptical about it,PESSIMISTIC
...,...,...
897,you could do it,OTHER
898,you look nice,OTHER
899,you look smart,OTHER
900,you're right,OTHER


In [10]:
cv = CountVectorizer()
X_train = cv.fit_transform(df['text'])
X_test = cv.transform(df_test['text'])

y_train = df["function"]
y_test = df_test["function"]

In [11]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report

classifier = MultinomialNB()
classifier.fit(X_train, y_train)
y_pred_test = classifier.predict(X_test)
clf_report = classification_report(y_test, y_pred_test, zero_division=0)
print(clf_report)

              precision    recall  f1-score   support

       ANGRY       0.18      0.15      0.16       275
       OTHER       0.49      0.67      0.57       920
 PESSIMISTIC       0.29      0.07      0.12       581
     WORRIED       0.16      0.27      0.20       224

    accuracy                           0.38      2000
   macro avg       0.28      0.29      0.26      2000
weighted avg       0.35      0.38      0.34      2000



In [12]:
clf_report = classification_report(y_test, y_pred_test, zero_division=0, output_dict=True)
df_result = pd.DataFrame(clf_report).transpose()
with open(r'classification_report_functions_basic_emotion_naive_bayes.csv', 'w') as csv_file:
    df_result.to_csv(path_or_buf=csv_file)

In [13]:
df_test['predicted_label'] = y_pred_test
df_test.to_csv('preds_functions_basic_emotion_naive_bayes.tsv',header =True, sep = '\t',index=False)

# Function extended experiment

In [14]:
comfunctions_extended = 'comfunct_extended.txt'
df = pd.read_csv(comfunctions_extended, delimiter=";")
df = df[~df['function'].isin(['NOT_INTERESTED', 'BORED','DISLIKE', 'NOT_CORRECT', 'DISAPPOINTED', 'COMPLAIN', 'NOT_APPROVE',
                             'NOT_IMPORTANT', 'DISAGREE', 'REFUSE', 'WARN', 'THREATEN', 'UNWILLING', 'DISTRUST' ])]

df

Unnamed: 0,text,function
0,"Hey #friends, just wanted to clarify that the ...",OTHER
1,Good morning everyone! ☀️ Just wanted to say a...,OTHER
2,"Hey #team, don't forget our meeting at 2pm tod...",OTHER
3,"Sorry for the late reply, I was swamped at wor...",OTHER
4,"Hey, can someone introduce me to @jane_doe? I'...",OTHER
...,...,...
1805,just waiting for the other shoe to drop,PESSIMISTIC
1806,I don't think this is going to go well #notloo...,PESSIMISTIC
1807,I have a feeling things are going to turn out ...,PESSIMISTIC
1808,I don't have high hopes for this #dreadingit,PESSIMISTIC


In [15]:
cv = CountVectorizer()
X_train = cv.fit_transform(df['text'])
X_test = cv.transform(df_test['text'])

y_train = df["function"]
y_test = df_test["function"]

In [16]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report

classifier = MultinomialNB()
classifier.fit(X_train, y_train)
y_pred_test = classifier.predict(X_test)
clf_report = classification_report(y_test, y_pred_test, zero_division=0)
print(clf_report)

              precision    recall  f1-score   support

       ANGRY       0.18      0.18      0.18       275
       OTHER       0.48      0.63      0.55       920
 PESSIMISTIC       0.37      0.20      0.26       581
     WORRIED       0.18      0.16      0.17       224

    accuracy                           0.39      2000
   macro avg       0.30      0.29      0.29      2000
weighted avg       0.37      0.39      0.37      2000



In [17]:
clf_report = classification_report(y_test, y_pred_test, zero_division=0, output_dict=True)
df_result = pd.DataFrame(clf_report).transpose()
with open(r'classification_report_functions_extended_emotion_naive_bayes.csv', 'w') as csv_file:
    df_result.to_csv(path_or_buf=csv_file)

In [18]:
df_test['predicted_label'] = y_pred_test
df_test.to_csv('preds_functions_extended_emotion_naive_bayes.tsv',header =True, sep = '\t',index=False)