In [1]:
from transformers import pipeline
import numpy as np
import pandas as pd
from datasets import Dataset, load_dataset, DatasetDict, concatenate_datasets
import torch
from sklearn.metrics import classification_report

#Seeding for deterministic results i.e. showing same output 
RANDOM_SEED = 64
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
if torch.cuda.is_available():
   torch.cuda.manual_seed(RANDOM_SEED)
   torch.cuda.manual_seed_all(RANDOM_SEED) 
   torch.backends.cudnn.deterministic = True  
   torch.backends.cudnn.benchmark = False

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
emoevent_es_dataset = load_dataset("csv", sep = '\t', data_files ={'test': r'es_test.tsv'
                                                       })
emoevent_en_dataset = load_dataset("csv", sep = '\t', data_files ={
                                                        'test': r'en_test.tsv',
                                                       })

test_dataset = concatenate_datasets([emoevent_es_dataset['test'], emoevent_en_dataset['test']])

emoevent = DatasetDict({'test': test_dataset.shuffle(seed=42)})

Using custom data configuration default-c8e873fed443e156
Found cached dataset csv (/home/njfernandez/.cache/huggingface/datasets/csv/default-c8e873fed443e156/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317)
100%|██████████| 1/1 [00:00<00:00, 689.63it/s]
Using custom data configuration default-a74fc7f693d7463f
Found cached dataset csv (/home/njfernandez/.cache/huggingface/datasets/csv/default-a74fc7f693d7463f/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317)
100%|██████████| 1/1 [00:00<00:00, 415.65it/s]
Loading cached shuffled indices for dataset at /home/njfernandez/.cache/huggingface/datasets/csv/default-c8e873fed443e156/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-8f2ef9d8448eac04.arrow


In [3]:
function_mapping = {'OTHER': ['others', 'surprise', 'joy'],
           'NOT_INTERESTED': [''], 
           'DISLIKE':['disgust'], 
           'NOT_CORRECT': [''], 
           'PESSIMISTIC':['sadness'], 
           'WORRIED':['fear'], 
           'ANGRY': ['anger'], 
           'DISAPPOINTED': [''], 
           'BORED': [''], 
           'NOT_APPROVE':[''], 
           'NOT_IMPORTANT': [''], 
           'DISAGREE': [''], 
           'WARN': [''], 
           'COMPLAIN': [''], 
           'THREATEN': [''], 
           'UNWILLING': [''], 
           'DISTRUST' : [''],
           'REFUSE': [''] }

emoevent = emoevent.remove_columns("id").remove_columns("event").remove_columns("offensive")
def map_labels(batch):
    batch['function'] = [[] for _ in range(len(batch['tweet']))]
    for i, item in enumerate(batch["function"]):
        for key, value in function_mapping.items():
            for emotion in value:
                if emotion == batch["emotion"][i]:
                    batch["function"][i] = key
                    break
    return batch
emoevent = emoevent.map(map_labels, batched=True)

100%|██████████| 4/4 [00:00<00:00, 90.46ba/s]


In [4]:
zero_shot_model='MoritzLaurer/mDeBERTa-v3-base-mnli-xnli'
candidate_labels= {#"lack of interest": "NOT_INTERESTED",
                   #"boredom": "BORED",
                   "disgust": "DISLIKE", 
                   #"incorrectness": "NOT_CORRECT", 
                   "sadness": "PESSIMISTIC", 
                   "fear": "WORRIED", 
                   "anger": "ANGRY", 
                   #"disappointment": "DISAPPOINTED",
                   #"complaint": "COMPLAIN",
                   #"disapproval": "NOT_APPROVE",
                   #"unimportance": "NOT_IMPORTANT", 
                   #"disagreement": "DISAGREE",
                   #"refusal": "REFUSE",
                  #"warning": "WARN", 
                  #"threat": "THREATEN",
                  #"unwillingness": "UNWILLING",
                  #"distrust": "DISTRUST",
                  "joy, surprise or other emotion": "OTHER"}

h_ts = ['This person feels {}.', 'This person conveys {}.','This person shows {}.','This person expresses {}.', 
        'This text is {}.', 'This text is about {}.', 'This text shows {}.', 'This text expresses {}.', 'This text conveys {}.',
       'The communicative function of this text is {}.', 'The communicative intention of this text is {}.', 'The emotion of this text is {}.', 
        'The emotion expressed in this text is {}.'] # the templates used

def zero_shot_pipeline(example):
    output = classifier(example['tweet'], candidate_labels=list(candidate_labels.keys()), hypothesis_template=ht, multi_label=False)
    labels_scores = {candidate_labels[key]: score for key, score in zip(output['labels'], output['scores'])}
    label, score = sorted(labels_scores.items(), key=lambda kv: kv[1], reverse=True)[0]
    return {"predicted_label": label, "score": score}

In [5]:
# Use GPU if available
classifier = pipeline('zero-shot-classification', model=zero_shot_model, device=0)

for ht in h_ts:
    print(ht)
    emoevent['test'] = emoevent['test'].map(zero_shot_pipeline)   
    y_true = emoevent['test']["function"]
    y_pred = emoevent['test']["predicted_label"]
    clf_report = classification_report(
            y_true, y_pred, zero_division=0)
    print(clf_report)
    clf_report = classification_report(
            y_true, y_pred, zero_division=0, output_dict=True)
    df = pd.DataFrame(clf_report).transpose()
    with open(f"classification_report_emoevent_ht={ht}_zero_shot.csv", 'w') as csv_file:
        df.to_csv(path_or_buf=csv_file)
    ds_zero_shot = emoevent['test'].to_pandas()
    cols = ["tweet", "emotion", "function", "predicted_label", "score"]
    df_test = ds_zero_shot[:][cols]
    df_test.to_csv(f"preds_emoevent_ht={ht}_zero_shot.csv",header =True, sep = '\t',index=False)



This person feels {}.


100%|██████████| 3073/3073 [03:04<00:00, 16.63ex/s]


              precision    recall  f1-score   support

       ANGRY       0.10      0.49      0.17       244
     DISLIKE       0.04      0.03      0.03       184
       OTHER       0.90      0.25      0.40      2316
 PESSIMISTIC       0.17      0.26      0.21       278
     WORRIED       0.04      0.55      0.07        51

    accuracy                           0.27      3073
   macro avg       0.25      0.32      0.18      3073
weighted avg       0.70      0.27      0.33      3073

This person conveys {}.


100%|██████████| 3073/3073 [03:14<00:00, 15.78ex/s]


              precision    recall  f1-score   support

       ANGRY       0.08      0.50      0.14       244
     DISLIKE       0.03      0.05      0.03       184
       OTHER       0.87      0.19      0.31      2316
 PESSIMISTIC       0.17      0.28      0.21       278
     WORRIED       0.08      0.33      0.13        51

    accuracy                           0.22      3073
   macro avg       0.25      0.27      0.16      3073
weighted avg       0.68      0.22      0.27      3073

This person shows {}.


100%|██████████| 3073/3073 [03:10<00:00, 16.11ex/s]


              precision    recall  f1-score   support

       ANGRY       0.08      0.57      0.15       244
     DISLIKE       0.03      0.03      0.03       184
       OTHER       0.88      0.18      0.30      2316
 PESSIMISTIC       0.17      0.30      0.22       278
     WORRIED       0.07      0.37      0.11        51

    accuracy                           0.22      3073
   macro avg       0.25      0.29      0.16      3073
weighted avg       0.69      0.22      0.26      3073

This person expresses {}.


100%|██████████| 3073/3073 [03:09<00:00, 16.25ex/s]


              precision    recall  f1-score   support

       ANGRY       0.09      0.39      0.15       244
     DISLIKE       0.02      0.04      0.03       184
       OTHER       0.89      0.16      0.28      2316
 PESSIMISTIC       0.13      0.35      0.19       278
     WORRIED       0.04      0.55      0.08        51

    accuracy                           0.20      3073
   macro avg       0.24      0.30      0.15      3073
weighted avg       0.69      0.20      0.24      3073

This text is {}.


100%|██████████| 3073/3073 [03:15<00:00, 15.73ex/s]


              precision    recall  f1-score   support

       ANGRY       0.11      0.61      0.18       244
     DISLIKE       0.10      0.04      0.05       184
       OTHER       0.90      0.30      0.45      2316
 PESSIMISTIC       0.17      0.28      0.21       278
     WORRIED       0.05      0.37      0.09        51

    accuracy                           0.31      3073
   macro avg       0.26      0.32      0.20      3073
weighted avg       0.70      0.31      0.38      3073

This text is about {}.


100%|██████████| 3073/3073 [03:14<00:00, 15.81ex/s]


              precision    recall  f1-score   support

       ANGRY       0.10      0.50      0.17       244
     DISLIKE       0.17      0.08      0.11       184
       OTHER       0.87      0.32      0.47      2316
 PESSIMISTIC       0.14      0.37      0.21       278
     WORRIED       0.06      0.29      0.11        51

    accuracy                           0.33      3073
   macro avg       0.27      0.31      0.21      3073
weighted avg       0.69      0.33      0.40      3073

This text shows {}.


100%|██████████| 3073/3073 [03:14<00:00, 15.77ex/s]


              precision    recall  f1-score   support

       ANGRY       0.09      0.60      0.16       244
     DISLIKE       0.05      0.03      0.04       184
       OTHER       0.89      0.24      0.37      2316
 PESSIMISTIC       0.17      0.27      0.21       278
     WORRIED       0.06      0.37      0.10        51

    accuracy                           0.26      3073
   macro avg       0.25      0.30      0.18      3073
weighted avg       0.70      0.26      0.32      3073

This text expresses {}.


100%|██████████| 3073/3073 [03:14<00:00, 15.76ex/s]


              precision    recall  f1-score   support

       ANGRY       0.09      0.40      0.15       244
     DISLIKE       0.05      0.08      0.06       184
       OTHER       0.89      0.20      0.33      2316
 PESSIMISTIC       0.13      0.27      0.18       278
     WORRIED       0.04      0.47      0.08        51

    accuracy                           0.22      3073
   macro avg       0.24      0.28      0.16      3073
weighted avg       0.69      0.22      0.28      3073

This text conveys {}.


100%|██████████| 3073/3073 [03:15<00:00, 15.74ex/s]


              precision    recall  f1-score   support

       ANGRY       0.10      0.53      0.16       244
     DISLIKE       0.04      0.06      0.05       184
       OTHER       0.89      0.25      0.39      2316
 PESSIMISTIC       0.14      0.26      0.18       278
     WORRIED       0.06      0.33      0.10        51

    accuracy                           0.26      3073
   macro avg       0.24      0.29      0.18      3073
weighted avg       0.69      0.26      0.33      3073

The communicative function of this text is {}.


100%|██████████| 3073/3073 [03:15<00:00, 15.72ex/s]


              precision    recall  f1-score   support

       ANGRY       0.11      0.36      0.16       244
     DISLIKE       0.09      0.07      0.07       184
       OTHER       0.88      0.29      0.44      2316
 PESSIMISTIC       0.15      0.43      0.22       278
     WORRIED       0.04      0.45      0.08        51

    accuracy                           0.30      3073
   macro avg       0.25      0.32      0.19      3073
weighted avg       0.69      0.30      0.37      3073

The communicative intention of this text is {}.


100%|██████████| 3073/3073 [14:20<00:00,  3.57ex/s]    


              precision    recall  f1-score   support

       ANGRY       0.11      0.32      0.16       244
     DISLIKE       0.11      0.07      0.09       184
       OTHER       0.88      0.29      0.44      2316
 PESSIMISTIC       0.13      0.50      0.21       278
     WORRIED       0.05      0.39      0.09        51

    accuracy                           0.30      3073
   macro avg       0.26      0.31      0.20      3073
weighted avg       0.69      0.30      0.37      3073

The emotion of this text is {}.


100%|██████████| 3073/3073 [03:14<00:00, 15.83ex/s]


              precision    recall  f1-score   support

       ANGRY       0.12      0.30      0.17       244
     DISLIKE       0.16      0.11      0.13       184
       OTHER       0.84      0.52      0.65      2316
 PESSIMISTIC       0.15      0.36      0.22       278
     WORRIED       0.07      0.31      0.12        51

    accuracy                           0.46      3073
   macro avg       0.27      0.32      0.26      3073
weighted avg       0.67      0.46      0.53      3073

The emotion expressed in this text is {}.


100%|██████████| 3073/3073 [03:41<00:00, 13.89ex/s]


              precision    recall  f1-score   support

       ANGRY       0.12      0.28      0.17       244
     DISLIKE       0.18      0.13      0.15       184
       OTHER       0.85      0.50      0.63      2316
 PESSIMISTIC       0.15      0.38      0.21       278
     WORRIED       0.08      0.39      0.13        51

    accuracy                           0.45      3073
   macro avg       0.27      0.34      0.26      3073
weighted avg       0.67      0.45      0.52      3073

