# This is the reimplementation of the paper "EmoBERTa: Speaker-Aware Emotion Recognition in Conversation with RoBERTa" by Taewoon Kim and Piek Vossen, Vrije Universiteit Amsterdam.

# Link of the paper: https://arxiv.org/pdf/2108.12009.pdf.

# Few parts of the code are copied directly from https://github.com/tae898/erc and are mentioned seperately in the code with the help of comments.

In [1]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "4"

In [2]:
# !pip install datasets
# !pip install transformers
# !pip install accelerate
# !pip install transformers[torch]

In [3]:
import numpy as np
import json
import pandas as pd
import json
import os
import random
import torch
from sklearn.metrics import f1_score
from tqdm import tqdm
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification, Trainer, TrainingArguments

2024-04-25 22:22:35.867328: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
# Opening JSON file
with open('Subtask_1_train.json') as json_file:
    data = json.load(json_file)

In [5]:
data[0]

{'conversation_ID': 1,
 'conversation': [{'utterance_ID': 1,
   'text': 'Alright , so I am back in high school , I am standing in the middle of the cafeteria , and I realize I am totally naked .',
   'speaker': 'Chandler',
   'emotion': 'neutral'},
  {'utterance_ID': 2,
   'text': 'Oh , yeah . Had that dream .',
   'speaker': 'All',
   'emotion': 'neutral'},
  {'utterance_ID': 3,
   'text': 'Then I look down , and I realize there is a phone ... there .',
   'speaker': 'Chandler',
   'emotion': 'surprise'},
  {'utterance_ID': 4,
   'text': 'Instead of ... ?',
   'speaker': 'Joey',
   'emotion': 'surprise'},
  {'utterance_ID': 5,
   'text': 'That is right .',
   'speaker': 'Chandler',
   'emotion': 'anger'},
  {'utterance_ID': 6,
   'text': 'Never had that dream .',
   'speaker': 'Joey',
   'emotion': 'neutral'},
  {'utterance_ID': 7,
   'text': 'No .',
   'speaker': 'Phoebe',
   'emotion': 'neutral'},
  {'utterance_ID': 8,
   'text': 'All of a sudden , the phone starts to ring .',
   's

In [6]:
for i in range(len(data)):
    data[i].pop('conversation_ID')
    data[i].pop('emotion-cause_pairs')

In [7]:
new_data = []
for i in range(len(data)):
    new_data.append(data[i]['conversation'])

In [8]:
new_data[0]

[{'utterance_ID': 1,
  'text': 'Alright , so I am back in high school , I am standing in the middle of the cafeteria , and I realize I am totally naked .',
  'speaker': 'Chandler',
  'emotion': 'neutral'},
 {'utterance_ID': 2,
  'text': 'Oh , yeah . Had that dream .',
  'speaker': 'All',
  'emotion': 'neutral'},
 {'utterance_ID': 3,
  'text': 'Then I look down , and I realize there is a phone ... there .',
  'speaker': 'Chandler',
  'emotion': 'surprise'},
 {'utterance_ID': 4,
  'text': 'Instead of ... ?',
  'speaker': 'Joey',
  'emotion': 'surprise'},
 {'utterance_ID': 5,
  'text': 'That is right .',
  'speaker': 'Chandler',
  'emotion': 'anger'},
 {'utterance_ID': 6,
  'text': 'Never had that dream .',
  'speaker': 'Joey',
  'emotion': 'neutral'},
 {'utterance_ID': 7,
  'text': 'No .',
  'speaker': 'Phoebe',
  'emotion': 'neutral'},
 {'utterance_ID': 8,
  'text': 'All of a sudden , the phone starts to ring .',
  'speaker': 'Chandler',
  'emotion': 'neutral'}]

In [9]:
emo_dict = {"neutral":0, "joy":1, "surprise":2, "anger":3, "sadness":4, "disgust":5, "fear":6}

In [10]:
for i in range(len(new_data)):
    for j in range(len(new_data[i])):
        new_data[i][j].pop('utterance_ID')
        new_data[i][j].update({"utterance": new_data[i][j]["speaker"]+": "+new_data[i][j]["text"]})
        emotion = emo_dict[new_data[i][j]['emotion']]
        new_data[i][j].update({'emotion':emotion})
        new_data[i][j].pop('text')
        new_data[i][j].pop('speaker')

In [11]:
new_data[0]

[{'emotion': 0,
  'utterance': 'Chandler: Alright , so I am back in high school , I am standing in the middle of the cafeteria , and I realize I am totally naked .'},
 {'emotion': 0, 'utterance': 'All: Oh , yeah . Had that dream .'},
 {'emotion': 2,
  'utterance': 'Chandler: Then I look down , and I realize there is a phone ... there .'},
 {'emotion': 2, 'utterance': 'Joey: Instead of ... ?'},
 {'emotion': 3, 'utterance': 'Chandler: That is right .'},
 {'emotion': 0, 'utterance': 'Joey: Never had that dream .'},
 {'emotion': 0, 'utterance': 'Phoebe: No .'},
 {'emotion': 0,
  'utterance': 'Chandler: All of a sudden , the phone starts to ring .'}]

In [12]:
training_data = new_data[:1099].copy()
validation_data = new_data[1099:1236].copy()
testing_data = new_data[1236:].copy()

In [13]:
training_data[0]

[{'emotion': 0,
  'utterance': 'Chandler: Alright , so I am back in high school , I am standing in the middle of the cafeteria , and I realize I am totally naked .'},
 {'emotion': 0, 'utterance': 'All: Oh , yeah . Had that dream .'},
 {'emotion': 2,
  'utterance': 'Chandler: Then I look down , and I realize there is a phone ... there .'},
 {'emotion': 2, 'utterance': 'Joey: Instead of ... ?'},
 {'emotion': 3, 'utterance': 'Chandler: That is right .'},
 {'emotion': 0, 'utterance': 'Joey: Never had that dream .'},
 {'emotion': 0, 'utterance': 'Phoebe: No .'},
 {'emotion': 0,
  'utterance': 'Chandler: All of a sudden , the phone starts to ring .'}]

In [14]:
# new_train_data = []
# train_episodes = []
# for i in range(len(train_data)):
#   di = {}
#   di.update({i:None})
#   new_train_data.append(di)
#   train_episodes.append(i)
# for i in range(len(train_data)):
#   new_train_data[i][train_episodes[i]] = []
#   for j in range(len(train_data[i]['emotions'])):
#     new_train_data[i][train_episodes[i]].append({'Utterance':train_data[i]['speakers'][j]+" : " +train_data[i]['utterances'][j],
#                                          'Emotion':emo_dict[train_data[i]['emotions'][j].lower()]})

In [15]:
# new_val_data = []
# val_episodes = []
# for i in range(len(val_data)):
#   di = {}
#   di.update({i:None})
#   new_val_data.append(di)
#   val_episodes.append(i)
# for i in range(len(val_data)):
#   new_val_data[i][val_episodes[i]] = []
#   for j in range(len(val_data[i]['emotions'])):
#     new_val_data[i][val_episodes[i]].append({'Utterance':val_data[i]['speakers'][j]+" : " +val_data[i]['utterances'][j],
#                                          'Emotion':emo_dict[val_data[i]['emotions'][j].lower()]})

In [16]:
# new_train_data[0][0]

In [17]:
class ErcDataset(torch.utils.data.Dataset):
    def __init__(self, data, num_past_utterances=0, num_future_utterances=0):
        """Initializer for emotion recognition in conversation text dataset."""
        self.data = data
        self.num_past_utterances = num_past_utterances
        self.num_future_utterances = num_future_utterances
        self.processed_data = self._create_input()

    def __len__(self):
        """Returns the length of the processed data."""
        return len(self.processed_data)

    def _create_input(self):
        """Creates inputs for RoBERTa."""
        tokenizer = AutoTokenizer.from_pretrained('FacebookAI/roberta-base', use_fast=True)
        max_model_input_size = tokenizer.max_model_input_sizes['FacebookAI/roberta-base']
        inputs = []
        for dialog in self.data:
            num_truncated = 0
            for idx, utterance in enumerate(dialog):
                num_tokens = len(tokenizer(utterance["utterance"])["input_ids"])
                label = utterance["emotion"]
                indexes = [idx]
                indexes_past = [i for i in range(idx - 1, idx - self.num_past_utterances - 1, -1)]
                indexes_future = [i for i in range(idx + 1, idx + self.num_future_utterances + 1, 1)]
                offset = 0
                if len(indexes_past) < len(indexes_future):
                    indexes_past.extend([None] * (len(indexes_future) - len(indexes_past)))
                elif len(indexes_past) > len(indexes_future):
                    indexes_future.extend([None] * (len(indexes_past) - len(indexes_future)))
                for i, j in zip(indexes_past, indexes_future):
                    if i is not None and i >= 0:
                        indexes.insert(0, i)
                        offset += 1
                        if sum(num_tokens for idx_ in indexes) > max_model_input_size:
                            del indexes[0]
                            offset -= 1
                            num_truncated += 1
                            break
                    if j is not None and j < len(dialog):
                        indexes.append(j)
                        if sum(num_tokens for idx_ in indexes) > max_model_input_size:
                            del indexes[-1]
                            num_truncated += 1
                            break
                final_utterance = "</s></s>".join([dialog[idx_]["utterance"] for idx_ in indexes])
                input_ids_attention_mask = tokenizer(final_utterance, return_tensors="pt", padding="max_length", truncation=True, max_length=max_model_input_size)
                input_ = {
                    "input_ids": input_ids_attention_mask["input_ids"].squeeze(),
                    "attention_mask": input_ids_attention_mask["attention_mask"].squeeze(),
                    "label": label,
                }
                inputs.append(input_)
        return inputs

    def __getitem__(self, index):
        """Returns the processed data at the given index."""
        return self.processed_data[index]


In [18]:
model = AutoModelForSequenceClassification.from_pretrained('FacebookAI/roberta-base', num_labels=7)

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [19]:
training_dataset = ErcDataset(training_data,0,0)
validation_dataset = ErcDataset(validation_data,0,0)

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [20]:
# from google.colab import drive
# drive.mount('/content/drive')

In [21]:
tokenizer = AutoTokenizer.from_pretrained('FacebookAI/roberta-base', use_fast=True)

In [22]:
# ! pip install evaluate

In [23]:
import evaluate
f1 = evaluate.load("f1")
accuracy = evaluate.load('accuracy')

Downloading builder script:   0%|          | 0.00/6.77k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]

In [24]:
# taken reference from https://github.com/tae898/erc/blob/main/utils/utils.py
def compute_metrics(eval_predictions):
    preds = np.argmax(eval_predictions.predictions, axis=1)
    return {"f1 score": f1.compute(predictions=preds, references=eval_predictions.label_ids, average='macro'), 
            "accuracy": accuracy.compute(predictions=preds, references=eval_predictions.label_ids)}

In [25]:
training_args = TrainingArguments(
    output_dir="/home/research/shaina mehta/RoBBERTa/NLP Assignment 4/",
    evaluation_strategy="epoch",
    learning_rate=1e-4,
    per_device_train_batch_size=40,
    per_device_eval_batch_size=40,
    weight_decay=0.01,
    save_total_limit=1,
    num_train_epochs=10,
    logging_strategy='epoch',
    metric_for_best_model="f1"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=training_dataset,
    eval_dataset=validation_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

trainer.train()

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 4.15.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


Epoch,Training Loss,Validation Loss,F1 score,Accuracy
1,1.3202,1.246268,{'f1': 0.32027811258645766},{'accuracy': 0.5718954248366013}
2,1.1356,1.184893,{'f1': 0.3373621135604655},{'accuracy': 0.5784313725490197}
3,0.9833,1.217117,{'f1': 0.38932893953559083},{'accuracy': 0.5841503267973857}
4,0.776,1.356479,{'f1': 0.41771702622976054},{'accuracy': 0.579248366013072}
5,0.5977,1.493901,{'f1': 0.4040803217515604},{'accuracy': 0.571078431372549}
6,0.4313,1.599961,{'f1': 0.42993292519126847},{'accuracy': 0.5751633986928104}
7,0.3154,1.806997,{'f1': 0.42590432737488504},{'accuracy': 0.5841503267973857}
8,0.2388,1.977332,{'f1': 0.42571253133392856},{'accuracy': 0.579248366013072}
9,0.1854,2.162232,{'f1': 0.4199397734945783},{'accuracy': 0.5661764705882353}
10,0.1381,2.295276,{'f1': 0.41723163736220387},{'accuracy': 0.5702614379084967}


TrainOutput(global_step=2730, training_loss=0.6121941772572723, metrics={'train_runtime': 2489.7773, 'train_samples_per_second': 43.859, 'train_steps_per_second': 1.096, 'total_flos': 2.873301709824e+16, 'train_loss': 0.6121941772572723, 'epoch': 10.0})

In [30]:
model.save_pretrained("/home/research/shaina mehta/RoBBERTa/Project/Model Weights")

In [31]:
tokenizer.save_pretrained("/home/research/shaina mehta/RoBBERTa/Project/Tokenizer")

('/home/research/shaina mehta/RoBBERTa/Project/Tokenizer/tokenizer_config.json',
 '/home/research/shaina mehta/RoBBERTa/Project/Tokenizer/special_tokens_map.json',
 '/home/research/shaina mehta/RoBBERTa/Project/Tokenizer/vocab.json',
 '/home/research/shaina mehta/RoBBERTa/Project/Tokenizer/merges.txt',
 '/home/research/shaina mehta/RoBBERTa/Project/Tokenizer/added_tokens.json',
 '/home/research/shaina mehta/RoBBERTa/Project/Tokenizer/tokenizer.json')

In [28]:
# import matplotlib.pyplot as plt
# import numpy as np

In [29]:
# x = np.arange(10)
# y1 = np.array([0.813800, 0.345900, 0.231800, 0.176200, 0.135200, 0.106600, 0.080800, 0.062400, 0.049300, 0.041200])
# y2 = np.array([0.493111, 0.289156, 0.235381, 0.161628, 0.136387, 0.108866, 0.089369, 0.072593, 0.070675, 0.066201])
# plt.plot(x,y1,label="training loss")
# plt.plot(x,y2,label="validation loss")
# plt.legend()
# plt.xlabel('Epoch')
# plt.ylabel('Loss')
# plt.title('Loss V/S Epoch')
# plt.show()