In [1]:
!pip install transformers
!pip install datasets

Collecting datasets
  Downloading datasets-2.16.1-py3-none-any.whl (507 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m507.1/507.1 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
Collecting pyarrow-hotfix (from datasets)
  Downloading pyarrow_hotfix-0.6-py3-none-any.whl (7.9 kB)
Collecting dill<0.3.8,>=0.3.0 (from datasets)
  Downloading dill-0.3.7-py3-none-any.whl (115 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.15-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m13.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: pyarrow-hotfix, dill, multiprocess, datasets
Successfully installed datasets-2.16.1 dill-0.3.7 multiprocess-0.70.15 pyarrow-hotfix-0.6


## Import Libraries

In [2]:
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset, Dataset
from transformers.pipelines.pt_utils import KeyDataset
import os
import torch
import pandas as pd

In [3]:
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
device = torch.device("cuda:0"  if torch.cuda.is_available() else "cpu")

## Load Emotion Model

In [4]:
pipe = pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-emotion-multilabel-latest", return_all_scores=True,device=device )

config.json:   0%|          | 0.00/1.20k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/409 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/280 [00:00<?, ?B/s]



In [None]:
pipe("Stop w ’we have to worry about the children’ No we do not-many R >20yrs old Go home and make your country better or enter ours legally we can’t afford them#NODACA")

[[{'label': 'anger', 'score': 0.2868918180465698},
  {'label': 'anticipation', 'score': 0.2280246615409851},
  {'label': 'disgust', 'score': 0.4210518002510071},
  {'label': 'fear', 'score': 0.26769891381263733},
  {'label': 'joy', 'score': 0.013531035743653774},
  {'label': 'love', 'score': 0.0023378191981464624},
  {'label': 'optimism', 'score': 0.2784464359283447},
  {'label': 'pessimism', 'score': 0.27133604884147644},
  {'label': 'sadness', 'score': 0.35492101311683655},
  {'label': 'surprise', 'score': 0.010908824391663074},
  {'label': 'trust', 'score': 0.023149728775024414}]]

In [None]:
[[{'label': 'anger', 'score': 0.133321613073349},
  {'label': 'anticipation', 'score': 0.2689744830131531},
  {'label': 'disgust', 'score': 0.24493607878684998},
  {'label': 'fear', 'score': 0.2876245975494385},
  {'label': 'joy', 'score': 0.018130697309970856},
  {'label': 'love', 'score': 0.002839383902028203},
  {'label': 'optimism', 'score': 0.3733889162540436},
  {'label': 'pessimism', 'score': 0.2678101062774658},
  {'label': 'sadness', 'score': 0.3561314642429352},
  {'label': 'surprise', 'score': 0.00995288323611021},
  {'label': 'trust', 'score': 0.02865920588374138}]]

## Load Dataset

In [5]:
# Load dataset from Hugging Face
dataset = load_dataset("krishan-CSE/HatEval-Relabeled")

Downloading readme:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.13M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/145k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/340k [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

## Get Train,Dev,Test Splits

In [6]:
train_dataset=dataset['train']

In [7]:
dev_dataset=dataset['validation']

In [8]:
test_dataset=dataset['test']

In [9]:
# Function to get predictions and store them in the dataset as vectors
def get_predictions_and_store(dataset):
    prediction = pipe(dataset['text'])
    df = [pd.DataFrame(d).set_index('label').transpose() for d in prediction]
    result_df = pd.concat(df, ignore_index=True)
    result_df = result_df.reset_index(drop=True)
    dataset_with_predictions=dataset.add_column("anger",result_df['anger'].tolist())
    dataset_with_predictions=dataset_with_predictions.add_column("anticipation",result_df['anticipation'].tolist())
    dataset_with_predictions=dataset_with_predictions.add_column("disgust",result_df['disgust'].tolist())
    dataset_with_predictions=dataset_with_predictions.add_column("fear",result_df['fear'].tolist())
    dataset_with_predictions=dataset_with_predictions.add_column("joy",result_df['joy'].tolist())
    dataset_with_predictions=dataset_with_predictions.add_column("love",result_df['love'].tolist())
    dataset_with_predictions=dataset_with_predictions.add_column("optimism",result_df['optimism'].tolist())
    dataset_with_predictions=dataset_with_predictions.add_column("pessimism",result_df['pessimism'].tolist())
    dataset_with_predictions=dataset_with_predictions.add_column("sadness",result_df['sadness'].tolist())
    dataset_with_predictions=dataset_with_predictions.add_column("surprise",result_df['surprise'].tolist())
    dataset_with_predictions=dataset_with_predictions.add_column("trust",result_df['trust'].tolist())

    return  dataset_with_predictions

## Get predictions and update the dataset

In [None]:
train_dataset_with_predictions = get_predictions_and_store(train_dataset)

In [None]:
dev_dataset_with_predictions = get_predictions_and_store(dev_dataset)

In [None]:
test_dataset_with_predictions = get_predictions_and_store(test_dataset)

## Save Datasets as CSV Files

In [None]:
train_dataset_with_predictions.to_csv('train.csv')

Creating CSV from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

3073774

In [None]:
dev_dataset_with_predictions.to_csv('dev.csv')

Creating CSV from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

359341

In [None]:
test_dataset_with_predictions.to_csv('test.csv')

Creating CSV from Arrow format:   0%|          | 0/3 [00:00<?, ?ba/s]

1046916