<h1>STEP 1. Emotion Classification</h1>
<p>This notebook consists simple code used for emotion classification in the form of a simple sequence classification task using RoBERTa-base from HuggingFace</p>
<p>The notebook has both the code for training followed by inference on the evaluation set. Please look out for the comments prior to each cell</p>

In [1]:
!pip install transformers[torch]
!pip install datasets
!pip install accelerate -U
!pip install ipywidgets
!pip install scikit-learn
!pip install boto3

<h1>1.1 Some static variables</h1>

In [2]:
subtask_1_train_file = '/workspace/SpanBERT/code/redundant/MultiModalEmotionCauseAnalysis/v2/data/text/Subtask_1_train.json'
subtask_1_test_file = '/workspace/SpanBERT/code/redundant/MultiModalEmotionCauseAnalysis/v2/data/text/Subtask_1_test.json'
train_model = "False"
test_model = "True"
save_dir = "./results_20_epochs_submisssion_ui_uall_custom_roberta_base_weighted_final_1"
#save_path stores the trained wieghts which could be used to infer and create the enriched test set. Bascially the directory that contains the .bin files
saved_path = "/workspace/SpanBERT/code/redundant/MultiModalEmotionCauseAnalysis/v2/results_30_epochs_data_leak_corrected_shuffled_ui_uall_custom_roberta_base_weighted/checkpoint-834"
batch_size =34
print(batch_size)

34


In [3]:
with open(subtask_1_train_file, "r") as f:
  import json
  data_ = json.load(f)
print(len(data_))
from transformers import RobertaTokenizer, RobertaForSequenceClassification
from transformers import Trainer, TrainingArguments
import torch
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import DataLoader, random_split

1374


<h1>1.2 Re-implementing the RoBERTa for Sequence classification class</h1>
<p>We Re-implement the Huggingface RobertaForSequenceClassification class to account for the class imbalance amongst the emotional utterances. We do so by passing the scaling factor to the CrossEntropyLoss function in the forward method, the idea being to balance the prediction across less representative classes as well</p>

In [4]:
_CHECKPOINT_FOR_DOC = "roberta-base"
_CONFIG_FOR_DOC = "RobertaConfig"
import random
seed = 12654 
random.seed(seed)
random.shuffle(data_)
sep = '</s>'
from transformers.models.roberta.modeling_roberta import RobertaClassificationHead
from typing import List, Optional, Tuple, Union
from transformers import RobertaPreTrainedModel, RobertaModel
from torch import nn
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
from transformers.utils import (
    add_code_sample_docstrings,
    add_start_docstrings,
    add_start_docstrings_to_model_forward,
    logging,
    replace_return_docstrings,
)
from transformers.models.roberta.modeling_roberta import (
    ROBERTA_INPUTS_DOCSTRING,
    ROBERTA_START_DOCSTRING,
    RobertaEmbeddings
)
from transformers.modeling_outputs import SequenceClassifierOutput
@add_start_docstrings(
    """
    RoBERTa Model transformer with a sequence classification/regression head on top (a linear layer on top of the
    pooled output) e.g. for GLUE tasks.
    """,
    ROBERTA_START_DOCSTRING,
)
class RobertaForSequenceClassificationCustom(RobertaPreTrainedModel):
    def __init__(self, config, pos_weight):
        super().__init__(config)
        self.num_labels = config.num_labels
        self.config = config

        self.roberta = RobertaModel(config, add_pooling_layer=False)
        self.classifier = RobertaClassificationHead(config)
        self.pos_weight = pos_weight
        print(pos_weight)
        # Initialize weights and apply final processing
        self.post_init()

    @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
    @add_code_sample_docstrings(
        checkpoint="cardiffnlp/twitter-roberta-base-emotion",
        output_type=SequenceClassifierOutput,
        config_class=_CONFIG_FOR_DOC,
        expected_output="'optimism'",
        expected_loss=0.08,
    )
    def forward(
        self,
        input_ids: Optional[torch.LongTensor] = None,
        attention_mask: Optional[torch.FloatTensor] = None,
        token_type_ids: Optional[torch.LongTensor] = None,
        position_ids: Optional[torch.LongTensor] = None,
        head_mask: Optional[torch.FloatTensor] = None,
        inputs_embeds: Optional[torch.FloatTensor] = None,
        labels: Optional[torch.LongTensor] = None,
        output_attentions: Optional[bool] = None,
        output_hidden_states: Optional[bool] = None,
        return_dict: Optional[bool] = None,
    ) -> Union[Tuple[torch.Tensor], SequenceClassifierOutput]:
        r"""
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        """
        return_dict = return_dict if return_dict is not None else self.config.use_return_dict

        outputs = self.roberta(
            input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )
        sequence_output = outputs[0]
        logits = self.classifier(sequence_output)

        loss = None
        if labels is not None:
            # move labels to correct device to enable model parallelism
            labels = labels.to(logits.device)
            weight_flag = True if self.pos_weight is not None else False
            if self.config.problem_type is None:
                if self.num_labels == 1:
                    self.config.problem_type = "regression"
                elif self.num_labels > 1 and (labels.dtype == torch.long or labels.dtype == torch.int):
                    print("single_label_classification")
                    self.config.problem_type = "single_label_classification"
                else:
                    print("multi_label_classification")
                    self.config.problem_type = "multi_label_classification"

            if self.config.problem_type == "regression":
                loss_fct = MSELoss()
                if self.num_labels == 1:
                    loss = loss_fct(logits.squeeze(), labels.squeeze())
                else:
                    loss = loss_fct(logits, labels)
            elif self.config.problem_type == "single_label_classification":
                
                if weight_flag:
                    weights = self.pos_weight.to(logits.device)
                    loss_fct = CrossEntropyLoss(weight=weights)
                else:
                    loss_fct = CrossEntropyLoss()
                loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
            elif self.config.problem_type == "multi_label_classification":
                loss_fct = BCEWithLogitsLoss(pos_weight=self.pos_weight.to(logits.device))
                loss = loss_fct(logits, labels)

        if not return_dict:
            output = (logits,) + outputs[2:]
            return ((loss,) + output) if loss is not None else output

        return SequenceClassifierOutput(
            loss=loss,
            logits=logits,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )

<h1>1.3 Data set class</h1>

In [5]:
class EmotionDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels=None):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        if self.labels is not None:
            item['labels'] = torch.tensor(self.labels[idx])
        else:
            item['labels'] = -1
        return item

    def __len__(self):
        return len(self.labels) if self.labels is not None else self.encodings['input_ids'].shape[0]

<h1>1.4 Methods that supplement the training and testing </h1>
<h2>get_data() method</h2>
<p>get_data(data_trial, train = False) is used to create the input prompt for the classifier</p>
<h2>f1_calc() method</h2>
<p>f1_calc(y_true, y_pred, average = 'weighted') is used to calculate runtime metrics during training</p>
<h2>compute_metrics() method</h2>
<p>compute_metrics(p) is the primary hook to calculate runtime metrics during training</p>


In [6]:
def get_data(data_trial, train = False):
    ds = []
    if not train:
        print(f"get data - train? {train}")
        ds = get_data_test(data_trial)
        return ds, -1
    
    counter = {'anger': 0, 'disgust': 0, 'fear': 0, 'joy': 0, 'sadness': 0, 'surprise': 0, 'neutral': 0}
    for x in data_trial:
        utt_all = ' '.join(conv['text'] for conv in x['conversation'])
#         utt_all =  x['conversation']
        for utt_i in x['conversation']:
            conversation_ID = x['conversation_ID']
            utt_id = utt_i['utterance_ID']
            counter[utt_i['emotion']]+=1
            u_i = utt_i['text']
            utt_all_causal = " "
            
#             ll = []
#             for idx,cnv in enumerate(utt_all):
# #                 if (idx+1) <=utt_id:
# #                     ll.append(cnv['speaker']+" : "+cnv['text'])
# #                 ll.append(cnv['speaker']+" : "+cnv['text'])
#                 ll.append(cnv['text'])
#             utt_all_causal = ".".join(ll)   
#             text = f'{u_i} {sep} {utt_all_causal}'
            text = f'{u_i} {sep} {utt_all}'
            ds.append({'id':f'{conversation_ID}_{utt_id}', 'label':utt_i['emotion'], 'text':text})
    return ds, counter

def get_data_test(data_trial):
    ds = []
    for x in data_trial:
        utt_all =  x['conversation']
        for utt_i in x['conversation']:
            conversation_ID = x['conversation_ID']
            utt_id = utt_i['utterance_ID']
            u_i = utt_i['text']
            utt_all_causal = " "
            ll = []
            for idx,cnv in enumerate(utt_all):
                ll.append(cnv['text'])
            utt_all_causal = ".".join(ll)   
            text = f'{u_i} {sep} {utt_all_causal}'
            ds.append({'id':f'{conversation_ID}_{utt_id}', 'label':None, 'text':text})
    return ds

def get_datan2(data_trial):
    ds = []
    counter = {'anger': 0, 'disgust': 0, 'fear': 0, 'joy': 0, 'sadness': 0, 'surprise': 0, 'neutral': 0}
    for x in data_trial:
        utt_all = ' '.join(conv['text'] for conv in x['conversation'])
#         utt_all =  x['conversation']
        conversation_ID = x['conversation_ID']
        
        
        
        for utt_i in x['conversation']:
            
            utt_i_id = utt_i['utterance_ID']
            u_i = utt_i['text']
#             counter[utt_i['emotion']]+=1
            for utt_j in x['conversation']:
                utt_j_id = utt_j['utterance_ID']
                u_j = utt_j['text']
                text = f'{u_i} {sep} {u_j} {sep} {utt_all}'
#             text = f'{u_i} {sep} {utt_all}'
                ds.append({'id':f'{conversation_ID}_{utt_i_id}_{utt_j_id}', 'label': utt_i.get('emotion', -1), 'text':text})
    return ds, counter

from sklearn.metrics import f1_score
def f1_calc(y_true, y_pred, average = 'weighted'):
        
        emotion_idx = dict(zip(["anger", "disgust", "fear", "joy", "sadness", "surprise", "neutral"], range(7)))
        inv_map = {v: k for k, v in emotion_idx.items()}

#         datapy = np.array([ (emotion_idx[x['predicted_emotion']] ,emotion_idx[x['gold_emotion']]) for x in output[key]])
#         y_pred,y_true  = datapy[:,0], datapy[:,1]
        f1_scores = f1_score(y_true, y_pred, average=average)
#         print(f'{file}, f1: {f1_scores}')
        f1_scores_n = f1_score(y_true, y_pred, average=None, labels=[0, 1, 2, 3, 4, 5, 6])
        
        score_map = {}
        for label, score in zip(range(7), f1_scores_n):
            l = inv_map[label]
            score_map[f'f1_{l}'] = score
        score_map['weighted_f1']=f1_scores
        print(score_map)
        return score_map


def compute_metrics(p):
    """
    Customize the `compute_metrics` of `transformers`
    Args:
        - p (tuple):      2 numpy arrays: predictions and true_labels
    Returns:
        - metrics (dict): f1 score on
    """
    metrics = {}
    predictions, true_labels = p
    f1_scores_n = f1_calc(true_labels, predictions.argmax(-1))
    metrics= f1_scores_n
    return metrics

def _add_whitespace_after_punctuations(txt):
    n = len(txt)
    punctuations = [
        ',',
        '!',
        '?',
        '.',
        ';',
        '$',
        '&',
        '"',
        '...'
    ]

    if n < 3:
        return txt

    if txt[-1] in punctuations:
        txt = f'{txt[:-1]} {txt[-1]}'
    if txt[0] in punctuations:
        txt = f'{txt[0]} {txt[1:]}'

    inner = txt[1: -1]

    for char in punctuations:
        inner = inner.replace(char, f' {char} ')

    txt = f'{txt[0]}{inner}{txt[-1]}'
    # removing extra whitespaces.
    txt = txt.replace('  ', ' ').upper()

    return txt

In [17]:
# sep = tokenizer.special_tokens_map['sep_token']
labels = []
total_size = len(data_)
train_size = int(total_size * 0.7) # 80% for training
eval_size = int(total_size * 0.2) # remaining 10% for eval
trial_size = total_size - train_size - eval_size # remaining 10% for testing
print(f'{train_size} {eval_size} {trial_size}')
data_train = data_[:train_size]
data_eval = data_[train_size:train_size+eval_size]
data_trial = data_[train_size+eval_size: train_size+eval_size+trial_size+1]

# Initialize tokenizer and model


tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

train_ds,counter =  get_data(data_train, train=True)
train_label_encoder = LabelEncoder()
train_label_encoder.fit(["anger", "disgust", "fear", "joy", "sadness", "surprise", "neutral"])
train_texts, train_labels  =  zip(*[(x['text'], x['label']) for x in train_ds])
train_encoded_labels = train_label_encoder.transform(train_labels)
train_encodings = tokenizer(train_texts, padding=True, truncation=True, return_tensors="pt")

eval_ds,_ = get_data(data_eval, train=True)
eval_label_encoder = LabelEncoder()
eval_label_encoder.fit(["anger", "disgust", "fear", "joy", "sadness", "surprise", "neutral"])
eval_texts, eval_labels  =  zip(*[(x['text'], x['label']) for x in eval_ds])
eval_encoded_labels = eval_label_encoder.transform(eval_labels)
eval_encodings = tokenizer(eval_texts, padding=True, truncation=True, return_tensors="pt")

trial_ds,cc = get_data(data_trial, train=True)
trial_label_encoder = LabelEncoder()
trial_label_encoder.fit(["anger", "disgust", "fear", "joy", "sadness", "surprise", "neutral"])
trial_texts, trial_labels  =  zip(*[(x['text'], x['label']) for x in trial_ds])
trial_encoded_labels = trial_label_encoder.transform(trial_labels)
trial_encodings = tokenizer(trial_texts, padding=True, truncation=True, return_tensors="pt")

print(f'{len(train_ds)} {len(eval_ds)} {len(trial_ds)}')

train_dataset = EmotionDataset(train_encodings, train_encoded_labels)
trial_dataset = EmotionDataset(trial_encodings, trial_encoded_labels)
eval_dataset = EmotionDataset(eval_encodings, eval_encoded_labels)


total_samples = sum(counter.values())
pos_weight = torch.tensor([total_samples / counter[class_name] for class_name in counter]).to('cuda')

model = RobertaForSequenceClassificationCustom.from_pretrained('roberta-base', num_labels=7, pos_weight = pos_weight)

1099 275 0


Some weights of RobertaForSequenceClassificationCustom were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


10851 2768
tensor([ 8.5576, 33.4907, 34.7788,  5.8559, 12.1376,  7.4629,  2.2863],
       device='cuda:0')


In [18]:
len(train_ds)

10851

In [21]:
#class distribution across classes in the train set
# counter

In [22]:
# label weights
# pos_weight

In [23]:
# Splitting the dataset and Creating DataLoaders for training and testing
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
eval_loader = DataLoader(eval_dataset, batch_size=batch_size, shuffle=False)
trial_loader = DataLoader(trial_dataset, batch_size=batch_size, shuffle=False)

<h1>1.5 Training</h1>
<p> This code will run only if train_model is set to True </p>

In [24]:
# HF Training arguments
if train_model:
    training_args = TrainingArguments(
        output_dir=save_dir,
        num_train_epochs=20,
        per_device_train_batch_size=batch_size,
        warmup_steps=500,
        weight_decay=0.01,
        learning_rate=5e-5,
        logging_dir='./logs',
        logging_steps=10,
        save_strategy='epoch',
        save_total_limit=1,
        load_best_model_at_end=True,
        metric_for_best_model='weighted_f1',
        log_level='critical',
        evaluation_strategy="epoch",
        seed=12345
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_loader.dataset,
        eval_dataset=eval_loader.dataset,
        compute_metrics=compute_metrics
    )

    # Train the model
    trainer.train()

  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


single_label_classification




Epoch,Training Loss,Validation Loss,F1 Anger,F1 Disgust,F1 Fear,F1 Joy,F1 Sadness,F1 Surprise,F1 Neutral,Weighted F1
1,1.3841,1.311341,0.421535,0.0,0.0,0.356808,0.694496,0.0,0.0,0.40741
2,1.1528,1.13585,0.396975,0.141732,0.203636,0.560229,0.737226,0.436893,0.09828,0.518251
3,1.0111,1.121387,0.454695,0.186335,0.171429,0.585106,0.758204,0.449878,0.539782,0.601974
4,0.8906,1.176469,0.391389,0.189944,0.190083,0.544489,0.73451,0.483721,0.406015,0.562306
5,0.6899,1.376874,0.431008,0.169565,0.243902,0.591607,0.711538,0.4197,0.411215,0.560477
6,0.6578,1.39842,0.503365,0.219081,0.220994,0.594895,0.726407,0.483559,0.509151,0.597034
7,0.3211,1.527583,0.541772,0.175182,0.2,0.579525,0.723496,0.48,0.589905,0.607163
8,0.3326,1.647589,0.505348,0.119403,0.211538,0.571713,0.732474,0.445833,0.486301,0.58604
9,0.2229,1.813684,0.48415,0.139241,0.233577,0.540024,0.740214,0.485106,0.59353,0.601235
10,0.1702,2.006667,0.493066,0.188976,0.198895,0.54433,0.720618,0.419643,0.592593,0.589413


{'f1_anger': 0.4215349369988545, 'f1_disgust': 0.0, 'f1_fear': 0.0, 'f1_joy': 0.35680751173708913, 'f1_sadness': 0.6944959802102658, 'f1_surprise': 0.0, 'f1_neutral': 0.0, 'weighted_f1': 0.4074100913965184}


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


{'f1_anger': 0.3969754253308128, 'f1_disgust': 0.14173228346456693, 'f1_fear': 0.20363636363636364, 'f1_joy': 0.5602294455066922, 'f1_sadness': 0.7372262773722628, 'f1_surprise': 0.4368932038834951, 'f1_neutral': 0.09828009828009827, 'weighted_f1': 0.5182513629047862}


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


{'f1_anger': 0.4546952224052718, 'f1_disgust': 0.18633540372670804, 'f1_fear': 0.17142857142857146, 'f1_joy': 0.5851063829787234, 'f1_sadness': 0.7582044511505093, 'f1_surprise': 0.44987775061124696, 'f1_neutral': 0.5397815912636504, 'weighted_f1': 0.6019742997778581}


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


{'f1_anger': 0.3913894324853229, 'f1_disgust': 0.18994413407821228, 'f1_fear': 0.19008264462809918, 'f1_joy': 0.5444887118193891, 'f1_sadness': 0.7345102111457251, 'f1_surprise': 0.4837209302325582, 'f1_neutral': 0.40601503759398494, 'weighted_f1': 0.5623055885124114}


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


{'f1_anger': 0.43100775193798446, 'f1_disgust': 0.16956521739130434, 'f1_fear': 0.24390243902439027, 'f1_joy': 0.5916069600818833, 'f1_sadness': 0.7115384615384615, 'f1_surprise': 0.4197002141327623, 'f1_neutral': 0.411214953271028, 'weighted_f1': 0.5604774756224982}


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


{'f1_anger': 0.5033647375504711, 'f1_disgust': 0.21908127208480563, 'f1_fear': 0.2209944751381215, 'f1_joy': 0.5948945615982242, 'f1_sadness': 0.7264069264069264, 'f1_surprise': 0.4835589941972921, 'f1_neutral': 0.5091514143094842, 'weighted_f1': 0.5970341656039905}


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


{'f1_anger': 0.5417721518987342, 'f1_disgust': 0.17518248175182477, 'f1_fear': 0.2, 'f1_joy': 0.5795246800731262, 'f1_sadness': 0.7234963219385547, 'f1_surprise': 0.48, 'f1_neutral': 0.5899053627760252, 'weighted_f1': 0.6071633076676812}


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


{'f1_anger': 0.45496183206106866, 'f1_disgust': 0.17194570135746606, 'f1_fear': 0.2391304347826087, 'f1_joy': 0.5494505494505494, 'f1_sadness': 0.7293715262932877, 'f1_surprise': 0.47755102040816333, 'f1_neutral': 0.5595054095826894, 'weighted_f1': 0.5902180937102929}


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


{'f1_anger': 0.4654088050314465, 'f1_disgust': 0.12345679012345678, 'f1_fear': 0.28187919463087246, 'f1_joy': 0.5479166666666666, 'f1_sadness': 0.7231083844580777, 'f1_surprise': 0.4243614931237721, 'f1_neutral': 0.5570370370370371, 'weighted_f1': 0.5827623688102201}


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


{'f1_anger': 0.49760765550239233, 'f1_disgust': 0.16455696202531644, 'f1_fear': 0.29059829059829057, 'f1_joy': 0.5849056603773585, 'f1_sadness': 0.7146619841966636, 'f1_surprise': 0.4338235294117647, 'f1_neutral': 0.5546995377503852, 'weighted_f1': 0.5912430333940171}


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


{'f1_anger': 0.46635367762128327, 'f1_disgust': 0.14634146341463417, 'f1_fear': 0.2678571428571429, 'f1_joy': 0.5601851851851851, 'f1_sadness': 0.7352823388065679, 'f1_surprise': 0.45643153526970953, 'f1_neutral': 0.5934065934065934, 'weighted_f1': 0.5985075277476526}


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


{'f1_anger': 0.516042780748663, 'f1_disgust': 0.1639344262295082, 'f1_fear': 0.25531914893617025, 'f1_joy': 0.5816649104320336, 'f1_sadness': 0.7328244274809159, 'f1_surprise': 0.43933054393305443, 'f1_neutral': 0.6054054054054053, 'weighted_f1': 0.6075685343390795}


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


{'f1_anger': 0.45367412140575086, 'f1_disgust': 0.1724137931034483, 'f1_fear': 0.27272727272727276, 'f1_joy': 0.5644820295983087, 'f1_sadness': 0.7302659349936683, 'f1_surprise': 0.46, 'f1_neutral': 0.5799769850402762, 'weighted_f1': 0.5948779594387891}


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


{'f1_anger': 0.491899852724595, 'f1_disgust': 0.1864406779661017, 'f1_fear': 0.26890756302521013, 'f1_joy': 0.5711361310133061, 'f1_sadness': 0.7332761578044596, 'f1_surprise': 0.4699029126213593, 'f1_neutral': 0.6055276381909547, 'weighted_f1': 0.6068735758195343}


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


{'f1_anger': 0.4935437589670014, 'f1_disgust': 0.18045112781954886, 'f1_fear': 0.26016260162601623, 'f1_joy': 0.5676855895196506, 'f1_sadness': 0.7309168443496801, 'f1_surprise': 0.45228215767634855, 'f1_neutral': 0.5952380952380952, 'weighted_f1': 0.6020799372070057}


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


{'f1_anger': 0.48888888888888893, 'f1_disgust': 0.17054263565891473, 'f1_fear': 0.2758620689655173, 'f1_joy': 0.5649838882921588, 'f1_sadness': 0.7350714886459209, 'f1_surprise': 0.4631147540983606, 'f1_neutral': 0.5982905982905984, 'weighted_f1': 0.6042743586021355}


<h1>1.6 Testing / Inference</h1>

In [15]:
with open(subtask_1_test_file, "r") as f:
# with open('dataset_1701871028.json', "r") as f:
  import json
  data_test_ = json.load(f)
print(len(data_test_))

batch_size =34
print(batch_size)

665
34


In [16]:

# print(f'{train_ds}')
# Convert to Dataset object
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
test_final_ds,test_final_counter =  get_data(data_test_)
# test_final_ds,test_final_counter =  get_data(data_test_)

# test_final_label_encoder = LabelEncoder()
# test_final_label_encoder.fit(["anger", "disgust", "fear", "joy", "sadness", "surprise", "neutral"])
# test_final_texts, test_final_labels  =  zip(*[(x['text'], x['label']) for x in test_final_ds])
test_final_texts  =  [x['text'] for x in test_final_ds]
# test_final_encoded_labels = test_final_label_encoder.transform(test_final_labels)
test_final_encodings = tokenizer(test_final_texts, padding=True, truncation=True, return_tensors="pt")

test_final_dataset = EmotionDataset(test_final_encodings, None)
test_final_loader = DataLoader(test_final_dataset, batch_size=batch_size, shuffle=False)
print(f'{len(test_final_ds)}')

get data - train? False
6301


In [17]:
test_enc = LabelEncoder()
test_enc.fit(["anger", "disgust", "fear", "joy", "sadness", "surprise", "neutral"])

<h1>1.7 Creating enriched dataset </h1>
<p> At this step we enrich the original dataset with emotions at utterance level from the trained model to pass to the span extractor </p>

In [18]:
from copy import deepcopy
enriched_data = deepcopy(data_test_)

In [19]:
conv_id_mapping = {data['conversation_ID']: idx for idx, data in enumerate(enriched_data)}

In [20]:
for data in enriched_data:
    data['emotion-cause_pairs'] = []

In [21]:
import os
# saved_path = "/workspace/SpanBERT/code/redundant/MultiModalEmotionCauseAnalysis/v2/results_20_epochs_submisssion_ui_uall_custom_roberta_base_weighted_final_1/checkpoint-2400/"
base_path = os.path.join(*os.path.split(saved_path)[:-1])

if  os.path.isdir(saved_path) and test_model:
    model = RobertaForSequenceClassificationCustom.from_pretrained(saved_path, num_labels=7, pos_weight=None).cuda()

    # if torch.cuda.device_count() > 1:
    #     print(f"Using {torch.cuda.device_count()} GPUs!")
    #     model = torch.nn.DataParallel(model)

    # Move your model to GPU
    # model.cuda()

    dev = model.device


    print(len(test_final_loader.dataset))

    n = test_final_loader.batch_size

    with torch.no_grad():
        fp = []
        cpunter = 0

        for idx, x in enumerate(test_final_loader):
            input_ids = x['input_ids'].to(dev)
            attention_mask = x['attention_mask'].to(dev)
            logits = model(input_ids=input_ids, attention_mask=attention_mask, labels=None).logits.cpu()
            labels = test_enc.inverse_transform(logits.argmax(-1).tolist())

            for i, label in enumerate(labels):
                idx_offset = idx * n + i
                conv_utt_id = test_final_ds[idx_offset]['id']
                conv_id, utt_id = conv_utt_id.split('_')[:2]
                data_idx = conv_id_mapping[int(conv_id)]

                enriched_data[data_idx]['emotion-cause_pairs'].append([f'{utt_id}_{label}'])
        print("done inferring")

    with open(os.path.join(base_path, 'enriched_data.json'), 'w') as f:
        json.dump(enriched_data, f, indent=4)

None
6301


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encod

done inferring
