In [None]:
import torch
import jsonlines
import numpy as np
import transformers
import pandas as pd
import pickle as pkl
from torch import nn
from tqdm import tqdm
from os.path import join
import multiprocessing as mp
from importlib import reload
from collections import Counter
from datasets import load_dataset
from torch.utils.data import Dataset
from torch.nn import CrossEntropyLoss
from torch.utils.data import DataLoader
from sklearn.metrics import classification_report
from transformers import (WEIGHTS_NAME,
                          BertConfig, BertForSequenceClassification, BertTokenizer,
                          XLMConfig, XLMForSequenceClassification, XLMTokenizer,
                          DistilBertConfig, DistilBertForSequenceClassification, DistilBertTokenizer,
                          RobertaConfig, RobertaForSequenceClassification, RobertaTokenizer)
from transformers import BertPreTrainedModel, BertModel, AdamW, get_linear_schedule_with_warmup, AutoTokenizer, AutoModel

In [None]:
class emotionDataset(Dataset):
    """Class to load the dataset and get batches of paras"""

    def __init__(self, list_data,
                 tokenizer, max_length):

        self.tokenizer = tokenizer
        self.max_length = max_length
        self.data = list_data
        self.pad_token = 1

    def __len__(self):
        """Return length of dataset."""
        return self.data.__len__()

    def __getitem__(self, i):
        """Return sample from dataset at index i."""
        example = self.data[i]
        inputs = self.tokenizer.encode_plus(example['text'],
                                            add_special_tokens=True,
                                            truncation=True,
                                            max_length=self.max_length)

        input_ids = inputs["input_ids"]
        input_ids = input_ids[:self.max_length]
        attention_mask = [1] * len(input_ids)

        padding_length = self.max_length - len(input_ids)
        input_ids = input_ids + ([self.pad_token] * padding_length)
        attention_mask = attention_mask + ([0] * padding_length)

        assert len(input_ids) == self.max_length, "Error with input length {} vs {}".format(len(input_ids), self.max_length)

        nli_label = example['labels'][0]

        return_dict = {'input_ids':torch.LongTensor(input_ids),
                       'attention_mask':torch.LongTensor(attention_mask),
                       'labels': torch.LongTensor([nli_label])}

        return return_dict

In [None]:
id2label = {0:"admiration",
            1:"amusement",
            2:"anger",
            3:"annoyance",
            4:"approval",
            5:"caring",
            6:"confusion",
            7:"curiosity",
            8:"desire",
            9:"disappointment",
            10:"disapproval",
            11:"disgust",
            12:"embarrassment",
            13:"excitement",
            14:"fear",
            15:"gratitude",
            16:"grief",
            17:"joy",
            18:"love",
            19:"nervousness",
            20:"optimism",
            21:"pride",
            22:"realization",
            23:"relief",
            24:"remorse",
            25:"sadness",
            26:"surprise",
            27:"neutral"}

In [None]:
dataset = load_dataset("go_emotions", "simplified")

Downloading readme:   0%|          | 0.00/9.40k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/2.77M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/350k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/347k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/43410 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/5426 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/5427 [00:00<?, ? examples/s]

In [None]:
Counter([x['labels'].__len__() for x in dataset['train']])

Counter({1: 36308, 2: 6541, 3: 532, 4: 28, 5: 1})

In [None]:
dataset.keys()

dict_keys(['train', 'validation', 'test'])

In [None]:
dataset['train'][0]

{'text': "My favourite food is anything I didn't have to cook myself.",
 'labels': [27],
 'id': 'eebbqej'}

In [None]:
roberta_config = RobertaConfig.from_pretrained('roberta-base',
                                      num_labels=len(id2label),
                                      finetuning_task='GoEmotions',
                                      cache_dir=None,
                                      output_attentions=False,
                                      output_hidden_states=False)



config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

In [None]:
tokenizer = RobertaTokenizer.from_pretrained('roberta-base', do_lower_case=False)

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [None]:
train_dataset = emotionDataset(list_data=dataset['train'],
                               tokenizer=tokenizer,
                               max_length=200)

In [None]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True,
                          num_workers=mp.cpu_count())

In [None]:
example_batch = next(iter(train_loader))

In [None]:
example_batch['input_ids'].shape, example_batch['attention_mask'].shape, example_batch['labels'].shape

(torch.Size([32, 200]), torch.Size([32, 200]), torch.Size([32, 1]))

In [None]:
args = {'weight_decay':0.0,
        'learning_rate':2e-5,
        'epochs':5,
        'gradient_accumulation_steps':1,
        'adam_epsilon':1e-8}
args['t_total'] = len(train_loader) // args['gradient_accumulation_steps'] * args['epochs']
args['warmup_steps'] = int(0.10*args['t_total'])

In [None]:
model = RobertaForSequenceClassification(config=roberta_config).cuda()

In [None]:
no_decay = ['bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
        {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],
         'weight_decay': args['weight_decay']},
        {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
    ]
optimizer = AdamW(optimizer_grouped_parameters, lr=args['learning_rate'], eps=args['adam_epsilon'])
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=args['warmup_steps'],
                                            num_training_steps=args['t_total'])



In [None]:
model = nn.DataParallel(model.cuda())

In [None]:
model.train()
for each_epoch in range(args['epochs']):
    for batch in tqdm(train_loader):
        model.zero_grad()
        outputs = model(**batch)
        ### Loss calculation
        loss = outputs[0].mean()

        loss.backward()
        optimizer.step()
        scheduler.step()  # Update learning rate schedule

100%|██████████| 1357/1357 [22:19<00:00,  1.01it/s]
100%|██████████| 1357/1357 [22:20<00:00,  1.01it/s]
100%|██████████| 1357/1357 [22:19<00:00,  1.01it/s]
100%|██████████| 1357/1357 [22:21<00:00,  1.01it/s]
100%|██████████| 1357/1357 [22:21<00:00,  1.01it/s]


In [None]:
test_dataset = emotionDataset(list_data=dataset['test'],
                               tokenizer=tokenizer,
                               max_length=200)

In [None]:
test_loader = DataLoader(test_dataset, batch_size=32,
                         shuffle=False, num_workers=mp.cpu_count())

In [None]:
def evaluate(model_, eval_loader, id2label):
    model.eval()
    dict_result = {'actual': [], 'preds': [], 'emotions': []}
    device = next(model_.parameters()).device  # Get model's device

    with torch.no_grad():
        for batch in tqdm(eval_loader):
            batch = {k: v.to(device) for k, v in batch.items()}  # Move batch to device

            outputs = model_(**batch)
            logits = outputs[1]

            predicted_labels = np.argmax(logits.detach().cpu().numpy(), axis=1)
            dict_result['actual'] += batch['labels'].cpu().numpy().tolist()  # Move labels to CPU
            dict_result['preds'] += predicted_labels.tolist()

            # Map predicted labels to emotions
            predicted_emotions = [id2label.get(str(label), "unknown") for label in predicted_labels]
            dict_result['emotions'] += predicted_emotions

    return dict_result

In [None]:
dict_test_results = evaluate(model_=model, eval_loader=test_loader, id2label=id2label)

100%|██████████| 170/170 [00:57<00:00,  2.95it/s]


In [None]:
def get_performance(actual_og, preds_og, dict_mapping, avoid_labels=[]):
    actual_og = [x[0] for x in actual_og]
    actual_ = []
    preds_ = []
    for ind in tqdm(range(actual_og.__len__())):
        if actual_og[ind] not in avoid_labels and preds_og[ind] not in avoid_labels:
            actual_.append(actual_og[ind])
            preds_.append(preds_og[ind])
    df_report = classification_report(actual_, preds_)
    print(df_report)
    print('--'*20)
    print('STATS')
    print('--'*20)
    print('Actual counter:', Counter(actual_))
    print('Prediction counter:', Counter(preds_))
    print('Mapping:', dict_mapping)
    return df_report

In [None]:
df_test = get_performance(actual_og=dict_test_results['actual'],
                          preds_og=dict_test_results['preds'],
                          dict_mapping=id2label)

100%|██████████| 5427/5427 [00:00<00:00, 890760.26it/s]

              precision    recall  f1-score   support

           0       0.62      0.61      0.62       504
           1       0.70      0.87      0.77       252
           2       0.42      0.45      0.44       197
           3       0.26      0.16      0.20       286
           4       0.33      0.23      0.27       318
           5       0.30      0.25      0.27       114
           6       0.31      0.28      0.30       139
           7       0.44      0.56      0.49       233
           8       0.48      0.27      0.34        74
           9       0.30      0.17      0.21       127
          10       0.27      0.20      0.23       220
          11       0.45      0.39      0.42        84
          12       0.44      0.13      0.21        30
          13       0.34      0.31      0.32        84
          14       0.55      0.62      0.58        74
          15       0.80      0.85      0.82       288
          16       0.00      0.00      0.00         6
          17       0.50    


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
df_test = get_performance(actual_og=dict_test_results['actual'],
                          preds_og=dict_test_results['preds'],
                          dict_mapping=id2label,
                          avoid_labels=[12, 16, 19, 21, 23, 24])

100%|██████████| 5427/5427 [00:00<00:00, 1301159.70it/s]

              precision    recall  f1-score   support

           0       0.63      0.61      0.62       504
           1       0.71      0.87      0.78       252
           2       0.42      0.45      0.44       197
           3       0.27      0.16      0.20       285
           4       0.34      0.23      0.27       316
           5       0.31      0.26      0.28       113
           6       0.32      0.28      0.30       137
           7       0.44      0.56      0.49       231
           8       0.49      0.27      0.35        73
           9       0.31      0.17      0.22       126
          10       0.27      0.20      0.23       216
          11       0.49      0.39      0.43        84
          13       0.35      0.31      0.33        84
          14       0.61      0.62      0.62        74
          15       0.80      0.85      0.82       288
          17       0.51      0.46      0.48       115
          18       0.63      0.77      0.69       169
          20       0.52    




In [None]:
df_test = get_performance(actual_og=dict_test_results['actual'],
                          preds_og=dict_test_results['preds'],
                          dict_mapping=id2label,
                          avoid_labels=[8, 11, 12, 13, 14, 16, 19, 21, 23, 24, 26])
metrics = {}
for each_ in [x.split('      ') for x in df_test.split('\n')[2:-5]]:
    label = id2label[int(each_[1])]
    metrics[f"F1_Score_{label}"] = float(each_[-2])
    metrics[f"Support_{label}"] = int(each_[-1])

# Save the metrics to goemotions_results.txt
with open("goemotions_results.txt", "w") as f:
    for metric_name, metric_value in metrics.items():
        f.write(f"{metric_name}: {metric_value}\n")

100%|██████████| 5427/5427 [00:00<00:00, 1195383.25it/s]

              precision    recall  f1-score   support

           0       0.64      0.64      0.64       485
           1       0.73      0.87      0.79       250
           2       0.46      0.47      0.46       191
           3       0.31      0.17      0.22       268
           4       0.36      0.24      0.29       308
           5       0.32      0.27      0.29       109
           6       0.33      0.29      0.31       134
           7       0.45      0.60      0.52       217
           9       0.34      0.18      0.23       119
          10       0.28      0.21      0.24       205
          15       0.82      0.86      0.84       284
          17       0.56      0.50      0.53       107
          18       0.65      0.78      0.71       166
          20       0.57      0.54      0.55       112
          22       0.27      0.04      0.07        92
          25       0.46      0.49      0.48        93
          27       0.59      0.70      0.64      1550

    accuracy              




In [None]:
dict_roberta = {}
for each_ in [x.split('      ') for x in df_test.split('\n')[2:-5]]:
    dict_roberta[id2label[int(each_[1])]] = [float(each_[-2]) , int(each_[-1])]

In [None]:
baseline_results = open('goemotions_results.txt').read()

In [None]:
dict_baseline = {}
for each_ in baseline_results.split('\n'):
    dict_baseline[each_.split(' ')[0]] = each_.split(' ')[-1]

In [None]:
labels_to_avoid = []
list_macro_f1_score = []
for each_key in dict_baseline:
    # Extract the emotion label, handling both F1 and Support keys
    if 'F1_Score_' in each_key:
        emotion_label = each_key.replace("F1_Score_", "").replace(":", "")
    elif 'Support_' in each_key:
        emotion_label = each_key.replace("Support_", "").replace(":", "")
    else:
        continue  # Skip keys that are neither F1 nor Support

    if emotion_label in dict_roberta:  # Check if the label exists in dict_roberta
        print('=='*30)
        print('Emotion:', emotion_label)
        print('=='*30)
        if 'F1_Score_' in each_key:
            print('Baseline:', dict_baseline[each_key])
            print('RoBERTa:', dict_roberta[emotion_label][0])
        print('Support:', dict_roberta[emotion_label][1])
        if dict_roberta[emotion_label][1]<100:
            labels_to_avoid.append(emotion_label)
            print('Added.')

# Now this should give you the correct result:
result = [key for key,val in id2label.items() if val in labels_to_avoid]
print(result)

Emotion: admiration
Baseline: 0.64
RoBERTa: 0.64
Support: 485
Emotion: admiration
Support: 485
Emotion: amusement
Baseline: 0.79
RoBERTa: 0.79
Support: 250
Emotion: amusement
Support: 250
Emotion: anger
Baseline: 0.46
RoBERTa: 0.46
Support: 191
Emotion: anger
Support: 191
Emotion: annoyance
Baseline: 0.22
RoBERTa: 0.22
Support: 268
Emotion: annoyance
Support: 268
Emotion: approval
Baseline: 0.29
RoBERTa: 0.29
Support: 308
Emotion: approval
Support: 308
Emotion: caring
Baseline: 0.29
RoBERTa: 0.29
Support: 109
Emotion: caring
Support: 109
Emotion: confusion
Baseline: 0.31
RoBERTa: 0.31
Support: 134
Emotion: confusion
Support: 134
Emotion: curiosity
Baseline: 0.52
RoBERTa: 0.52
Support: 217
Emotion: curiosity
Support: 217
Emotion: disappointment
Baseline: 0.23
RoBERTa: 0.23
Support: 119
Emotion: disappointment
Support: 119
Emotion: disapproval
Baseline: 0.24
RoBERTa: 0.24
Support: 205
Emotion: disapproval
Support: 205
Emotion: gratitude
Baseline: 0.84
RoBERTa: 0.84
Support: 284
Emotion: 

In [None]:
[key for key,val in id2label.items() if val in labels_to_avoid]

[22, 25]

In [None]:
print(labels_to_avoid)

['realization', 'realization', 'sadness', 'sadness']


In [None]:
[key for key,val in id2label.items() if val in labels_to_avoid]

[22, 25]

In [None]:
model

RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
             

In [None]:
!pip install transformers



In [None]:
!pip install huggingface_hub



In [None]:
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer # Remove extra spaces before this line
from huggingface_hub import notebook_login

In [None]:
print(type(model))

<class 'transformers.models.roberta.modeling_roberta.RobertaForSequenceClassification'>


In [None]:
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from huggingface_hub import notebook_login

# Assume model and tokenizer are already loaded and/or trained

# Directory where the model and tokenizer will be saved
model_dir = "GemotionsFT"

# Check if the model is wrapped in DataParallel
if isinstance(model, torch.nn.DataParallel):
    model = model.module

# Save the model
model.save_pretrained(model_dir)

# Save the tokenizer
tokenizer = AutoTokenizer.from_pretrained('roberta-base')
tokenizer.save_pretrained(model_dir)

print(f"Model and tokenizer saved in directory {model_dir}")

# Verify the files in the directory
import os
print(os.listdir(model_dir))


Model and tokenizer saved in directory GemotionsFT
['special_tokens_map.json', 'merges.txt', 'config.json', 'vocab.json', 'pytorch_model.bin', 'model.safetensors', 'tokenizer_config.json', 'tokenizer.json']


In [None]:
notebook_login()  # Follow the prompts to log in

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
from huggingface_hub import HfApi
api = HfApi()
model_id = "Mukundhan32/GemotionsFT1"  # Replace with your desired model name
api.upload_folder(
    folder_path="GemotionsFT",
    repo_id=model_id,
    repo_type="model",
)

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/Mukundhan32/GemotionsFT1/commit/fb7459a22794e8b71aa967fb7b4a4c9c4478b460', commit_message='Upload folder using huggingface_hub', commit_description='', oid='fb7459a22794e8b71aa967fb7b4a4c9c4478b460', pr_url=None, pr_revision=None, pr_num=None)

In [None]:
!zip -r GemotionsFT.zip GemotionsFT  # Zip the folder

from google.colab import files
files.download('GemotionsFT.zip')  # Download the zip file

  adding: GemotionsFT/ (stored 0%)
  adding: GemotionsFT/special_tokens_map.json (deflated 52%)
  adding: GemotionsFT/merges.txt (deflated 53%)
  adding: GemotionsFT/config.json (deflated 65%)
  adding: GemotionsFT/vocab.json (deflated 59%)
  adding: GemotionsFT/pytorch_model.bin (deflated 7%)
  adding: GemotionsFT/model.safetensors (deflated 7%)
  adding: GemotionsFT/tokenizer_config.json (deflated 76%)
  adding: GemotionsFT/tokenizer.json (deflated 72%)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>