In [None]:
from ekphrasis.classes.preprocessor import TextPreProcessor
from ekphrasis.classes.tokenizer import SocialTokenizer
from ekphrasis.dicts.emoticons import emoticons

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import itertools
import requests, re, string, datetime, copy
from functools import partial

import torch
import torchvision.transforms as T, torch.nn.functional as F, torch.nn as nn
from torch.utils.data import DataLoader

from datasets import Dataset
import datasets
import logging
datasets.logging.get_verbosity = lambda: logging.NOTSET

from transformers import TrainingArguments, EarlyStoppingCallback, TrainerCallback
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import Trainer, get_linear_schedule_with_warmup
from transformers.modeling_outputs import SequenceClassifierOutput

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
from sklearn.metrics import precision_recall_fscore_support, accuracy_score, f1_score
from sklearn.dummy import DummyClassifier

import plotly.graph_objects as go

from multiprocessing.pool import ThreadPool
from threading import Lock

PATH = "./data/"
device = "cuda:0"

##### text preprocessing

In [None]:
"""Tokenization classes for Italian AlBERTo models."""
import collections
import os

text_processor = TextPreProcessor(
    # terms that will be normalized
    normalize=['url', 'email', 'user', 'percent', 'money', 'phone', 'time', 'date', 'number'],
    # terms that will be annotated
    annotate={"hashtag"},
    fix_html=True,  # fix HTML tokens

    unpack_hashtags=True,  # perform word segmentation on hashtags

    # select a tokenizer. You can use SocialTokenizer, or pass your own
    # the tokenizer, should take as input a string and return a list of tokens
    tokenizer=SocialTokenizer(lowercase=True).tokenize,
    dicts=[emoticons]
)

class AlBERTo_Preprocessing(object):
    def __init__(self, do_lower_case=True, **kwargs):
        self.do_lower_case = do_lower_case

    def preprocess(self, text):
        if self.do_lower_case:
            text = text.lower()
        text = str(" ".join(text_processor.pre_process_doc(text)))
        text = re.sub(r'[^a-zA-ZÀ-ú</>!?♥♡\s\U00010000-\U0010ffff]', ' ', text)
        text = re.sub(r'\s+', ' ', text)
        text = re.sub(r'(\w)\1{2,}', r'\1\1', text)
        text = re.sub(r'^\s', '', text)
        text = re.sub(r'\s$', '', text)
        return text

a = AlBERTo_Preprocessing(do_lower_case=True)
s = "#IlGOverno presenta, le linee guida... sulla scuola! #labuonascuola - http://t.co/SYS1T9QmQN"
b = a.preprocess(s)
print(b)

##### pretrained tokenizer and pretrained AlBERTo

In [None]:
from transformers import AutoTokenizer, AutoModel

def tokenize_function(examples):
    sa = tok(examples["text"], padding="max_length", truncation=True)
    return sa

a = AlBERTo_Preprocessing(do_lower_case=True)
s: str = "#IlGOverno presenta le linee guida sulla scuola #labuonascuola - http://t.co/SYS1T9QmQN"
b = a.preprocess(s)

tok = AutoTokenizer.from_pretrained("m-polignano-uniba/bert_uncased_L-12_H-768_A-12_italian_alb3rt0")
pretrained_model = AutoModel.from_pretrained("m-polignano-uniba/bert_uncased_L-12_H-768_A-12_italian_alb3rt0")
tok.model_max_length = 128 #model.config.max_position_embeddings
tokens = tok.tokenize(b)
print(tokens)

##### models

In [None]:
class MyNetMC(nn.Module):
    """
        Attach a FC layer on top of the BERT head in order to produce a classification output.

        The pooled_output output of BERT is basically a projection of the [CLS] embeddings via another FC layer (768 -> 768 hidden units).
        We stack another FC layer with Dropout on top of that, as reported in https://github.com/google-research/bert/blob/eedf5716ce1268e56f0a50264a88cafad334ac61/run_classifier.py#L574
    """
    def __init__(self, num_labels):
        super(MyNetMC, self).__init__()

        self.num_labels = num_labels
        self.model = copy.deepcopy(pretrained_model)#AutoModel.from_pretrained("m-polignano-uniba/bert_uncased_L-12_H-768_A-12_italian_alb3rt0")
        self.dropout1 = nn.Dropout(0.1)
        self.linear1 = nn.Linear(768, 3)

        self.loss_fct = nn.CrossEntropyLoss()

    def forward(self, labels, input_ids, attention_mask, **args):
        outputs = self.model(input_ids=input_ids, attention_mask=attention_mask, **args)
        x = self.dropout1(outputs[1])
        logits = self.linear1(x)
        loss = self.loss_fct(logits, labels)
        return logits , loss
    
class MyNetMCTuned(nn.Module):
    """
        Attach a FC layer on top of the BERT head in order to produce a classification output.

        The pooled_output output of BERT is basically a projection of the [CLS] embeddings via another FC layer (768 -> 768 hidden units).
        We stack another FC layer without Dropout on top of that, as reported in https://github.com/google-research/bert/blob/eedf5716ce1268e56f0a50264a88cafad334ac61/run_classifier.py#L574
    """
    def __init__(self, num_labels):
        super(MyNetMCTuned, self).__init__()

        self.num_labels = num_labels
        self.model = copy.deepcopy(pretrained_model)
        self.linear = nn.Linear(768, 3)

        self.loss_fct = nn.CrossEntropyLoss()

    def forward(self, labels, input_ids, attention_mask, **args):
        outputs = self.model(input_ids=input_ids, attention_mask=attention_mask, **args)
        logits = self.linear(outputs[1])
        loss = self.loss_fct(logits, labels)
        return logits , loss

#### EvSent

In [None]:
def evSent(string):
    url = "http://ai-rest.blupixelit.eu/rest.php?action=SentiLex&username=asr&password=asrpwd&lang=it&text=%s"%string
    try:
        result = requests.get(url).text
    except requests.exceptions.RequestException as e:
        raise print("ERROR EVSENT")
    result = (re.findall("<sentiment>(.*?)</sentiment>", result)[0])
    if result == "positive":
        return 2
    elif result == "negative":
        return 0
    else:
        return 1

def worker(label, text, pbar, preds, true, lock):
    res = evSent(text)
    lock.acquire()
    global index
    preds[index] = res
    true[index] = label
    index += 1
    pbar.update(1)
    lock.release()

def evsent_remove_neutrals(true, preds):
    preds2 , true2 = [] , []
    for i in range(len(preds)):
        if true[i] == 1:
            continue
        elif true[i] == 2:
            true2.append(1)
        else:
            true2.append(0)
        if preds[i] == 1:
            preds2.append(0 if np.random.rand(1) > 0.50 else 1)
        elif preds[i] == 2:
            preds2.append(1)
        else:
            preds2.append(0) 
    print(classification_report(true2, preds2, target_names=["negative", "positive"]))

#####  utility functions

In [None]:
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='macro')
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

def validation_epoch(model, epoch, val_loader, kind, logging):
    model.eval()
    targets , outputs , logitss = [] , [] , []
    cumulative_loss = 0.
    with torch.no_grad():
        for i , data in enumerate(val_loader, 0):
            batch = {k: v.to(device) for k, v in data.items()}
            logits , loss = model(**batch)
            if kind != "test": cumulative_loss += loss.detach()
            if (i+1) % 25 == 0 and logging:
                print(f'Epoch: {epoch}, Loss:  {cumulative_loss.item()/i}')
            
            targets.extend(batch["labels"].cpu().detach().numpy())
            outputs.extend(logits.argmax(-1).cpu().detach().numpy().tolist())
            logitss.extend(logits.cpu().detach().numpy().tolist())
    if logging: wandb.log({kind: {'loss': cumulative_loss.item() / len(outputs)}})
    return outputs, targets, logitss


def fine_tune_feel_it(model, training, logging, new_model_num_labels):
    if model.num_labels == 2:
        model.num_labels = new_model_num_labels
        model.classifier.out_proj = nn.Linear(768, new_model_num_labels)

    training_args = TrainingArguments(
                    "test_trainer", 
                    num_train_epochs=5,
                    per_device_train_batch_size=32,
                    save_total_limit=2,
                    learning_rate=2e-5,
                    warmup_ratio=0.1,
                    weight_decay=0.01,
                    adam_beta1=0.9,
                    adam_beta2=0.999,
                    adam_epsilon=1e-6,
                    logging_strategy="epoch",
                    overwrite_output_dir=True,
                    save_strategy="no",
                    report_to="none",
                )
    trainer = Trainer(
                model=model,
                args=training_args, 
                train_dataset=training, 
                compute_metrics=compute_metrics,
            )
    trainer.train()
    return trainer.model


def eval_feelit_model(dataset, keep_neutrals, dataset_name, plot_confidence=False, fine_tune=False, dataset_train=False, logging=False, model_i=None, model_num_labels=3):
    def MC2binary(e):
        #to convert the class label of positives from 2 into 1, when keep_neutrals=False and model_num_labels=2
        if not keep_neutrals:
            e["labels"] = e["labels"] if e["labels"] == 0 else 1
        return e
    
    tmp = "no neutral" if not keep_neutrals else ""
    tmp2 = "pretrained" if not fine_tune and model_i is None else "MC fine tuned"
    print("Feel-it ", tmp, " ", tmp2, " --- ", dataset_name)
    
    tok_feelit = AutoTokenizer.from_pretrained("MilaNLProc/feel-it-italian-sentiment")
    tok_feelit.model_max_length = 128

    if model_i is None:
        model = AutoModelForSequenceClassification.from_pretrained("MilaNLProc/feel-it-italian-sentiment").to(device)
    else:
        model = model_i

    if keep_neutrals:
        testing = Dataset.from_pandas(dataset)\
                        .filter(lambda example: example['labels'] != 3)\
                        .map(lambda examples: tok_feelit(examples["text"], padding="max_length", truncation=True), batched=True)\
                        .with_format("torch", columns=["input_ids", "attention_mask", "labels"])
    else:
        testing = Dataset.from_pandas(dataset)\
                .map(lambda examples: tok_feelit(examples["text"], padding="max_length", truncation=True), batched=True)\
                .filter(lambda example: example['labels'] != 1)\
                .filter(lambda example: example['labels'] != 3)\
                .with_format("torch", columns=["input_ids", "attention_mask", "labels"])
    test_loader = DataLoader(testing, batch_size=32)
    
    if fine_tune: #fine-tune while keeping neutral samples
        training = Dataset.from_pandas(dataset_train)\
                            .filter(lambda example: example['labels'] != 3)\
                            .map(MC2binary, batched=False)\
                            .map(lambda examples: tok_feelit(examples["text"], padding="max_length", truncation=True), batched=True)\
                            .shuffle()\
                            .with_format("torch", columns=["input_ids", "attention_mask", "labels"])
        model = fine_tune_feel_it(model, training, logging, model_num_labels)

    model.eval()
    trues , preds , logitss = [] , [] , []
    with torch.no_grad():
        for _, data in enumerate(test_loader, 0):
            batch = {k: data[k].to(device) for k in ['attention_mask', 'input_ids']}
            logits = model(**batch)[0]
            trues.extend(data["labels"].detach().numpy())
            preds.extend(logits.argmax(-1).cpu().detach().numpy().tolist())
            logitss.extend(logits.cpu().detach().numpy().tolist())
    
    trues = np.array(trues)
    preds = np.array(preds)
    proba = torch.nn.functional.softmax(torch.tensor(logitss), dim=1)
    assert len(preds) == len(proba) #and np.all(np.array((proba[:,1] + proba[:,0]) >= 0.99))
    num_assigned_to_neutral , num_correclty_assigned_to_neutral = 0 , 0
    for i in range(len(proba)):
        if keep_neutrals and not fine_tune and model_i is None:
            if preds[i] == 1: 
                preds[i] = 2
            if max(proba[i,0] , proba[i, 1]) <= 0.65: #set to neutral  if the prediction's confidence is too low
                preds[i] = 1
                num_assigned_to_neutral += 1
                if trues[i] == 1:
                    num_correclty_assigned_to_neutral += 1
        elif not keep_neutrals:
            if (fine_tune or model_i) and model_num_labels == 3:
                raise ValueError("Removing neutrals for fine-tuned model not implemented")
            elif (fine_tune or model_i) and model_num_labels == 2 and trues[i] == 2: 
                trues[i] = 1
            elif not fine_tune and model_i is None and trues[i] == 2:
                trues[i] = 1

    targets = ["negative", "neutral", "positive"] if keep_neutrals else ["negative", "positive"]
    print(classification_report(trues, preds, target_names=targets))
    cm = confusion_matrix(trues, preds, normalize='all')
    ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=targets).plot()
    plt.title(f"{dataset_name} {tmp} - Feel-it {tmp2}")
    if keep_neutrals:
        print(f"\n\n{num_assigned_to_neutral} samples were assigned to the class neutral, of which {num_correclty_assigned_to_neutral} were correct, out of {sum((trues) == 1)} neutrals\n\n")
    
    fig = go.Figure()
    fig.add_trace(go.Histogram(x=proba[:, 0], 
                               xbins=dict(
                                start=0.,
                                end=1.0,
                                size=0.1
                                ), 
                               histnorm='probability'))
    fig.update_layout(
        autosize=False,
        width=670,
        height=300,
        xaxis_title="confidence",
        yaxis_title="normalized count",
    )
    fig.update_xaxes(range=[0., 1.])
    fig.show()

    if plot_confidence:
        plot_confidence_distribution(trues, preds, proba, name=f"Feel-it model {tmp2}", dataset=dataset_name + f" {tmp}", plot_neutral=keep_neutrals)
    if fine_tune:
        return model
        
        
def train_epoch(model, train_loader, optimizer, scheduler, epoch, logging):
    model.train()
    targets , outputs = [] , []
    cumulative_loss = 0.
    for i , data in enumerate(train_loader, 0): 
        targets.extend(data["labels"].numpy())
        
        batch = {k: v.to(device) for k, v in data.items()}
        logits , loss = model(**batch)

        cumulative_loss += loss.detach()
        if (i+1) % 25 == 0 and logging:
            print(f'Epoch: {epoch}, Loss:  {cumulative_loss.item()/i}')
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step()
        
        outputs.extend(logits.argmax(-1).cpu().detach().numpy().tolist())
    return outputs, targets

def fine_tune_alberto(train_loader, model, logging, model_type, dataset_val=None):
    model.train()
    if model_type == "opt":
        hyper = {'learning_rate': 3.000003529363845e-06, 'warmup_steps': 0.6, 'weight_decay': 0.000260393798851559}
    elif model_type == "MC":
        hyper = {'learning_rate': 2e-5, 'warmup_steps': 0.1, 'weight_decay': 0.1}
        
    optimizer = torch.optim.AdamW(model.parameters(),
                              lr=hyper["learning_rate"],
                              weight_decay=hyper["weight_decay"], 
                              eps=1e-6)
    num_epochs = 5
    num_train_steps = int(len(train_loader) * num_epochs) + 1
    num_warmup_steps =  int(num_train_steps * hyper["warmup_steps"])
    scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps, num_train_steps)
    
    for epoch in range(num_epochs):
        outputs, targets = train_epoch(model, train_loader, optimizer, scheduler, epoch, logging)
        if logging: 
            f1_train = f1_score(targets, outputs, average='macro')
            print(f"Train F1 = {f1_train}")
    return model
        

def eval_alberto(model, model_name, model_params, dataset, dataset_name, keep_neutrals, plot_confidence, return_preds=False, fine_tuning=False, dataset_train=None, logging=False, model_type=None, model_init=None):
    tmp = " no neutrals" if not keep_neutrals else " "
    tmp2 = " fine tuned" if fine_tuning else " "
    print(model_name, tmp, tmp2, " --- ", dataset_name)
    
    if keep_neutrals:
        testing = Dataset.from_pandas(dataset)\
                        .filter(lambda example: example['labels'] != 3)\
                        .map(tokenize_function, batched=True)\
                        .with_format("torch", columns=["input_ids", "attention_mask", "labels", "token_type_ids"])
    else:
        testing = Dataset.from_pandas(dataset)\
                        .filter(lambda example: example['labels'] != 1)\
                        .filter(lambda example: example['labels'] != 3)\
                        .map(tokenize_function, batched=True)\
                        .with_format("torch", columns=["input_ids", "attention_mask", "labels", "token_type_ids"])
    test_loader = DataLoader(testing, batch_size=32)

    if model_init is None:
        model = model(num_labels=3).to(device)
        model.load_state_dict(torch.load(PATH + model_params))
    else:
        model = model_init
    
    if fine_tuning:
        if keep_neutrals:
            training = Dataset.from_pandas(dataset_train)\
                            .filter(lambda example: example['labels'] != 3)\
                            .map(tokenize_function, batched=True)\
                            .with_format("torch", columns=["input_ids", "attention_mask", "labels", "token_type_ids"])
        else:
            training = Dataset.from_pandas(dataset_train)\
                            .filter(lambda example: example['labels'] != 1)\
                            .filter(lambda example: example['labels'] != 3)\
                            .map(tokenize_function, batched=True)\
                            .with_format("torch", columns=["input_ids", "attention_mask", "labels", "token_type_ids"])
        train_loader = DataLoader(training, batch_size=64, shuffle=True)
        model = fine_tune_alberto(train_loader, model, logging, model_type)

    model.eval()
    preds , trues , logitss = validation_epoch(model, None, test_loader, "test", logging=False)
    proba = torch.nn.functional.softmax(torch.tensor(logitss), dim=1)
    for i in range(len(preds)):
        if not keep_neutrals:
            if trues[i] == 2: 
                 trues[i] = 1
            if preds[i] == 1: #reassign prediction to second most-probable prediction
                idx = np.argmax([proba[i][0], proba[i][2]])
                assert proba[i][0 if idx == 0 else 2] > proba[i][0 if idx == 1 else 2] and proba[i][0 if idx == 0 else 2] != 0 #to check that proba[i][0] != proba[i][2] != 0
                preds[i] = idx
            elif preds[i] == 2:
                preds[i] = 1
                
    targets = ["negative", "neutral", "positive"] if keep_neutrals else ["negative", "positive"]
    print(classification_report(trues, preds, target_names=targets))
    ConfusionMatrixDisplay(confusion_matrix=confusion_matrix(trues, preds, normalize='all'), display_labels=targets).plot()
    plt.title(f"{dataset_name} - {model_name} {tmp}")
    
    trues = np.array(trues)
    preds = np.array(preds)
        
    if plot_confidence:
        plot_confidence_distribution(trues, preds, proba, name=model_name, dataset=dataset_name, plot_neutral=keep_neutrals)
    if return_preds:
        return preds, trues, proba
    if fine_tuning:
        return model
    
    
def eval_mfc_baseline(dataset_train, dataset_test, keep_neutrals):
    if not keep_neutrals:
        dataset_train = dataset_train[dataset_train.labels != 1]
        dataset_test = dataset_test[dataset_test.labels != 1]
    dataset_train = dataset_train[dataset_train.labels != 3]
    dataset_test = dataset_test[dataset_test.labels != 3]
    
    dummy_clf = DummyClassifier(strategy="stratified").fit(dataset_train.text, dataset_train.labels)
    preds = dummy_clf.predict(dataset_test.text)
    
    targets = ["negative", "neutral", "positive"] if keep_neutrals else ["negative", "positive"]
    print(classification_report(dataset_test.labels, preds, target_names=targets))
    ConfusionMatrixDisplay(confusion_matrix=confusion_matrix(dataset_test.labels, preds, normalize='all'), display_labels=targets).plot()

In [None]:
def plot_confidence_distribution(trues, preds, proba, name, dataset, plot_neutral):
    false_negatives = np.logical_and(trues != 0, preds == 0)
    fig = go.Figure()
    fig.add_trace(go.Histogram(x=proba[false_negatives, np.argmax(proba[false_negatives,:], axis=1)], 
                               xbins=dict(
                                start=0.,
                                end=1.0,
                                size=0.1
                                ), 
                               histnorm='probability'))
    fig.update_layout(
        autosize=False,
        width=670,
        height=300,
        title=f"{name}'s confidence distribution for {dataset} - false negatives",
        xaxis_title="confidence",
        yaxis_title="normalized count",
    )
    fig.update_xaxes(range=[0., 1.])
    fig.show()

    if plot_neutral:
        false_neutrals = np.logical_and(trues != 1, preds == 1)
        fig = go.Figure()
        fig.add_trace(go.Histogram(x=proba[false_neutrals, np.argmax(proba[false_neutrals,:], axis=1)], 
                                   xbins=dict(
                                    start=0.,
                                    end=1.0,
                                    size=0.1
                                    ), 
                                   histnorm='probability'))
        fig.update_layout(
            autosize=False,
            width=670,
            height=300,
            title=f"{name}'s confidence distribution for {dataset} - false neutrals",
            xaxis_title="confidence",
            yaxis_title="normalized count",
        )
        fig.update_xaxes(range=[0., 1.])
        fig.show()

    false_positives = np.logical_and(trues != 2, preds == 2) if plot_neutral else np.logical_and(trues != 1, preds == 1)
    fig = go.Figure()
    fig.add_trace(go.Histogram(x=proba[false_positives, np.argmax(proba[false_positives,:], axis=1)], 
                               xbins=dict(
                                start=0.,
                                end=1.0,
                                size=0.1
                                ), 
                               histnorm='probability'))
    fig.update_layout(
        autosize=False,
        width=670,
        height=300,
        title=f"{name}'s confidence distribution for {dataset} - false positives",
        xaxis_title="confidence",
        yaxis_title="normalized count",
    )
    fig.update_xaxes(range=[0., 1.])
    fig.show()
    
    
def get_lexicon(dataset):
    """
    Takes as input an array containing already pre-processed text 
    """
    return set([token for sentence in dataset for token in sentence.split(" ")])

# SENTIPOLC16

In [None]:
train_sentipolc = pd.read_csv(PATH + "Sentipolc16/training_set_sentipolc16.csv")

In [None]:
file1 = open(PATH + "Sentipolc16/test_set_sentipolc16_gold2000.csv", 'r') 
Lines = file1.readlines()
 
test_sentipolc = []
for line in Lines:
    arr = line.split("\",")
    if len(arr) != 9:
        arr[8] = arr[8] + arr[9]  #to account for tweets containing the delimiter charachter that would create more splits than needed
        del arr[9:]
    for i in range(8):
        arr[i] = int(arr[i].strip("\""))
    test_sentipolc.append(arr)

test_sentipolc = pd.DataFrame(test_sentipolc, columns=train_sentipolc.columns)
test_sentipolc.head()

In [None]:
def separate2united_labels(row):
    """
        Return a single scalar integer label associated to the polarity of the tweet.

        Negative -> 0
        Neutral  -> 1
        Positive -> 2
        Mixed    -> 3
    """
    if row["opos"] == 0 and row["oneg"] == 0:
        return 1
    elif row["oneg"] == 0 and row["opos"] == 1:
        return 2
    elif row["oneg"] == 1 and row["opos"] == 0:
        return 0
    else:
        return 3

dataset_sentipolc_train = pd.DataFrame({"text": train_sentipolc.text.apply(a.preprocess), "idx": train_sentipolc.index, "labels": train_sentipolc[["opos", "oneg"]].apply(separate2united_labels, axis=1)})
dataset_sentipolc_test  = pd.DataFrame({"text": test_sentipolc.text.apply(a.preprocess), "idx": test_sentipolc.index, "labels": test_sentipolc[["opos", "oneg"]].apply(separate2united_labels, axis=1)})
print("train: ", len(dataset_sentipolc_train[dataset_sentipolc_train.labels != 3]), np.unique(dataset_sentipolc_train[dataset_sentipolc_train.labels != 3].labels, return_counts=True)[1]/len(dataset_sentipolc_train[dataset_sentipolc_train.labels != 3]))
print("Test: ", len(dataset_sentipolc_test[dataset_sentipolc_test.labels != 3]), np.unique(dataset_sentipolc_test[dataset_sentipolc_test.labels != 3].labels, return_counts=True)[1]/len(dataset_sentipolc_test[dataset_sentipolc_test.labels != 3]))
lexicon_sentipolc = get_lexicon(dataset_sentipolc_test.text)

tmp = pd.concat([dataset_sentipolc_train[dataset_sentipolc_train.labels != 3], dataset_sentipolc_test[dataset_sentipolc_test.labels != 3]])
print("All: ", len(tmp), np.unique(tmp.labels, return_counts=True)[1]/len(tmp))
print("Size lexicon all: ", len(get_lexicon(tmp.text)))
print("Avg num tokens: ", round(np.mean(tmp["text"].apply(lambda x: len(x.split(" ")))), 2))

### Baseline

In [None]:
#%%script echo skipping
examples = dataset_sentipolc_test[dataset_sentipolc_test.labels != 3] # exclude mixed tweets

preds , true , pbar = np.full(len(examples), -1) , np.full(len(examples), -1) , tqdm(total=len(examples))
index , lock , p = 0 , Lock() , ThreadPool(processes=6)
for text , label in zip(examples.text, examples.labels):
    p.apply_async(worker, (label, text, pbar, preds, true, lock))
p.close(); p.join()
pbar.close()
assert not np.any(preds == -1) and not np.any(true == -1)
print(classification_report(true, preds, target_names=["negative", "neutral", "positive"]))

##
# No Neutrals
# reassing labels so that no neutrals are present
##
print("\n\n No neutrals\n")
evsent_remove_neutrals(true, preds)


##
# Stochastic Most Frequent Classifier
##
print("\n\n SMFC\n")
eval_mfc_baseline(dataset_sentipolc_train, dataset_sentipolc_test, keep_neutrals=True)

print("\n\n SMFC no neutrals\n")
eval_mfc_baseline(dataset_sentipolc_train, dataset_sentipolc_test, keep_neutrals=False)

### AlBERTo pretrained - SENTIPOLC16

In [None]:
preds , trues , proba = eval_alberto(MyNetMC, "AlBERTo MC", "models/alberto_multiclass.pt", dataset_sentipolc_test, "Sentipolc16", return_preds=True, keep_neutrals=True, plot_confidence=True)
eval_alberto(MyNetMC, "AlBERTo MC", "models/alberto_multiclass.pt", dataset_sentipolc_test, "Sentipolc16", keep_neutrals=False, plot_confidence=True)

In [None]:
assert np.all(trues == np.array(test_sentipolc[["opos", "oneg"]].apply(separate2united_labels, axis=1)[dataset_sentipolc_test.labels != 3].tolist()))
irony = np.array(test_sentipolc.iro.tolist())[dataset_sentipolc_test.labels != 3][preds != trues]

print("Irony distribution in the prediction errors: ", np.unique(irony, return_counts=True))

In [None]:
for i in range(len(preds)):
    if preds[i] == 0 and trues[i] == 0:
        print("\n\n", dataset_sentipolc_test[dataset_sentipolc_test.labels != 3].text.tolist()[i], i)
        print("Pred=", preds[i])
        print("True=", trues[i])

### AlBERTo opt - SENTIPOLC16

In [None]:
eval_alberto(MyNetMCTuned, "AlBERTo MC opt", "models/alberto_multiclass_tuned.pt", dataset_sentipolc_test, "Sentipolc16", keep_neutrals=True, plot_confidence=True)
eval_alberto(MyNetMCTuned, "AlBERTo MC opt", "models/alberto_multiclass_tuned.pt", dataset_sentipolc_test, "Sentipolc16", keep_neutrals=False, plot_confidence=True)

### Feel-it model - SENTIPOLC16
Note that it was trained just for positive/negative class, so the neutral class is taken for examples with low confidence in the prediction.

In [None]:
eval_feelit_model(dataset_sentipolc_test, keep_neutrals=True, dataset_name="Sentipolc16", plot_confidence=True)
eval_feelit_model(dataset_sentipolc_test, keep_neutrals=False, dataset_name="Sentipolc16", plot_confidence=True)

### Feel-it MC fine tuned - SENTIPOLC16

In [None]:
eval_feelit_model(dataset_sentipolc_test, keep_neutrals=True, fine_tune=True, dataset_train=dataset_sentipolc_train, dataset_name="Sentipolc16", plot_confidence=True);

# FEEL-IT dataset

In [None]:
train_feel_it = pd.read_csv(PATH + "Feel-it/feelit.tsv", sep='\t', header=0, names=["text", "label"])
print("\nAverage tweet lenght: ", np.mean(train_feel_it["text"].apply(len)))
train_feel_it.head()

In [None]:
def emotion2sentiment(row):
    """
        Return a single scalar integer label associated to the emotion of the tweet.

        joy -> 2
        anger sadness fear  -> 0
    """
    if row["label"] == "joy":
        return 2
    elif row["label"] == "sadness" or row["label"] == "fear" or row["label"] == "anger":
        return 0
    else:
        raise ValueError('Value not recognized')

#train set
dataset_feel_it = pd.DataFrame({"text": train_feel_it.text.apply(a.preprocess), "idx": train_feel_it.index, "labels": train_feel_it[["label"]].apply(emotion2sentiment, axis=1)})
print("Test: ", len(dataset_feel_it), np.unique(dataset_feel_it.labels, return_counts=True)[1]/len(dataset_feel_it))
lexicon_feel_it = get_lexicon(dataset_feel_it.text)
print("Sentipolc lexicon  ∩ Feel-it lexicon: ", round(len(lexicon_feel_it.intersection(lexicon_sentipolc))/len(lexicon_feel_it.union(lexicon_sentipolc)),2))

tmp = dataset_feel_it
print("All: ", len(tmp), np.unique(tmp.labels, return_counts=True)[1]/len(tmp))
print("Size lexicon all: ", len(get_lexicon(tmp.text)))
print("Avg num tokens: ", round(np.mean(tmp["text"].apply(lambda x: len(x.split(" ")))), 2))

### Evsent baseline

In [None]:
%%script echo skipping
examples = dataset_feel_it

preds , true , pbar = np.full(len(examples), -1) , np.full(len(examples), -1) , tqdm(total=len(examples))
index , lock , p = 0 , Lock() , ThreadPool(processes=10)
for text , label in zip(examples.text, examples.labels):
    p.apply_async(worker, (label, text, pbar, preds, true, lock))
p.close(); p.join()
pbar.close()
assert not np.any(preds == -1) and not np.any(true == -1)
print(classification_report(true, preds, target_names=["negative", "neutral", "positive"]))

##
# No Neutrals
# reassing labels so that no neutrals are present
##
print("\n\n No neutrals\n")
evsent_remove_neutrals(true, preds)

### AlBERTo pretrained - FEEL-IT
Recall that FEEL_IT does not have neutral tweets, but AlBERTo was trained with such class. So reassign neutral predictions to the second most probable prediction.

In [None]:
preds , trues , proba = eval_alberto(MyNetMC, "AlBERTo MC", "models/alberto_multiclass.pt", dataset_feel_it, "Feel-it", return_preds=True, keep_neutrals=False, plot_confidence=False)

In [None]:
for i in range(len(preds)):
    if preds[i] == 1 and trues[i] == 1:
        print("\n\n", dataset_feel_it[dataset_feel_it.labels != 3].text.tolist()[i], i)
        print("Pred=", preds[i])
        print("True=", trues[i])

### AlBERTo opt - Feel-IT

In [None]:
eval_alberto(MyNetMCTuned, "AlBERTo MC opt", "models/alberto_multiclass_tuned.pt", dataset_feel_it, "Feel-it", keep_neutrals=False, plot_confidence=False)

### FEEL_IT model - FEEL_IT dataset

In [None]:
eval_feelit_model(dataset_feel_it, keep_neutrals=False, dataset_name="Feel-IT", plot_confidence=False)

# Amazon reviews

In [None]:
##
# To split the dataset, one split, read the directly the 2 files
##

# amazon = pd.read_json(PATH + "Amazon-reviews/Amazon_reviews_computer_cloths_food_shoaps.json")
# X_train, X_test = train_test_split(amazon, test_size=0.3, random_state=42, stratify=amazon["rating"])
# X_train.to_json(PATH + "Amazon-reviews/amazon_train.json")
# X_test.to_json(PATH + "Amazon-reviews/amazon_test.json")

In [None]:
amazon_train = pd.read_json(PATH + "Amazon-reviews/amazon_train.json")
amazon_test = pd.read_json(PATH + "Amazon-reviews/amazon_test.json")

amazon_train["text"] = amazon_train["title"] + ": " +  amazon_train["text"]
amazon_test["text"] = amazon_test["title"] + ": " +  amazon_test["text"]

print("Train: ", len(amazon_train) , np.unique(amazon_train.rating, return_counts=True)[1]/len(amazon_train))
print("Test:", len(amazon_test) , np.unique(amazon_test.rating, return_counts=True)[1]/len(amazon_test))

print("\nAverage review lenght Train: ", np.mean(amazon_train["text"].apply(len)))
print("\nAverage review lenght Test: ", np.mean(amazon_test["text"].apply(len)))

amazon_test.head()

In [None]:
amazon_test[amazon_test.text.str.contains("Preso durante")].iloc[0].text

In [None]:
def rating2sentiment(row):
    """
        Returns the polarity depending on the rating of the review
    """
    if row["rating"] == 3:
        return 1
    elif row["rating"] > 3:
        return 2
    elif row["rating"] < 3:
        return 0
    
#tok = AutoTokenizer.from_pretrained("m-polignano-uniba/bert_uncased_L-12_H-768_A-12_italian_alb3rt0")
#tok.model_max_length = 400

dataset_amazon_train = pd.DataFrame({"text": amazon_train.text.apply(a.preprocess), "idx": amazon_train.index, "labels": amazon_train[["rating"]].apply(rating2sentiment, axis=1)}) 
dataset_amazon_test  = pd.DataFrame({"text": amazon_test.text.apply(a.preprocess), "idx": amazon_test.index, "labels": amazon_test[["rating"]].apply(rating2sentiment, axis=1)}) 
print("Train: ", len(dataset_amazon_train), np.unique(dataset_amazon_train.labels, return_counts=True)[1]/len(dataset_amazon_train))
print("Test: ", len(dataset_amazon_test), np.unique(dataset_amazon_test.labels, return_counts=True)[1]/len(dataset_amazon_test))
lexicon_amazon = get_lexicon(dataset_amazon_test.text)
print("Amazon lexicon  ∩ Feel-it lexicon: ", round(len(lexicon_amazon.intersection(lexicon_feel_it))/len(lexicon_amazon.union(lexicon_feel_it)),2))
print("Amazon lexicon  ∩ Sentipolc lexicon: ", round(len(lexicon_amazon.intersection(lexicon_sentipolc))/len(lexicon_amazon.union(lexicon_sentipolc)),2))

tmp = pd.concat([dataset_amazon_train, dataset_amazon_test])
print("All: ", len(tmp), np.unique(tmp.labels, return_counts=True)[1]/len(tmp))
print("Size lexicon all: ", len(get_lexicon(tmp.text)))
print("Avg num tokens: ", round(np.mean(tmp["text"].apply(lambda x: len(x.split(" ")))), 2))

### EvSent baseline

In [None]:
%%script echo skipping
examples = dataset_amazon_test

preds , true , pbar = np.full(len(examples), -1) , np.full(len(examples), -1) , tqdm(total=len(examples))
index , lock , p= 0 , Lock() , ThreadPool(processes=10)
for text , label in zip(examples.text, examples.labels):
    p.apply_async(worker, (label, text[:1987], pbar, preds, true, lock)) #[:1987] to avoid too long URL
p.close(); p.join()
pbar.close()
assert not np.any(preds == -1) and not np.any(true == -1)
print(classification_report(true, preds, target_names=["negative", "neutral", "positive"]))

##
# No Neutrals
# reassing labels so that no neutrals are present
##
print("\n\n No neutrals\n")
evsent_remove_neutrals(true, preds)

##
# Stochastic
##
print("\n\n SMFC\n")
eval_mfc_baseline(dataset_amazon_train, dataset_amazon_test, keep_neutrals=True)

print("\n\n SMFC no neutrals\n")
eval_mfc_baseline(dataset_amazon_train, dataset_amazon_test, keep_neutrals=False)

### AlBERTo MC/opt - Amazon reviews

In [None]:
preds1 , trues1 , proba = eval_alberto(MyNetMC,      "AlBERTo MC", "models/alberto_multiclass.pt", dataset_amazon_test, "Amazon reviews", return_preds=True, keep_neutrals=True, plot_confidence=False)
eval_alberto(MyNetMCTuned, "AlBERTo MC opt", "models/alberto_multiclass_tuned.pt", dataset_amazon_test, "Amazon reviews", keep_neutrals=True, plot_confidence=False)
eval_alberto(MyNetMC,      "AlBERTo MC", "models/alberto_multiclass.pt", dataset_amazon_test, "Amazon reviews", keep_neutrals=False, plot_confidence=False)
eval_alberto(MyNetMCTuned, "AlBERTo MC opt", "models/alberto_multiclass_tuned.pt", dataset_amazon_test, "Amazon reviews", keep_neutrals=False, plot_confidence=False)

In [None]:
for i in range(len(preds)):
    if preds1[i] == 0 and trues1[i] == 0:
        print("\n\n", dataset_amazon_test.text.tolist()[i], i)
        print("Pred=", preds1[i])
        print("True=", trues1[i])

### AlBERTo MC/opt fine tuned - Amazon reviews

In [None]:
preds , trues , proba = eval_alberto(MyNetMC, 
                             "AlBERTo MC", 
                             "models/alberto_multiclass.pt", 
                             dataset_amazon_test, 
                             "Amazon reviews", 
                             return_preds=True,
                             keep_neutrals=True, 
                             plot_confidence=False, 
                             fine_tuning=True, 
                             dataset_train=dataset_amazon_train,
                             logging=False,
                             model_type="MC"
                            )

eval_alberto(MyNetMCTuned, 
             "AlBERTo MC opt", 
             "models/alberto_multiclass_tuned.pt", 
             dataset_amazon_test, 
             "Amazon reviews", 
             keep_neutrals=True, 
             plot_confidence=False, 
             fine_tuning=True, 
             dataset_train=dataset_amazon_train,
             logging=False,
             model_type="opt"
            )

eval_alberto(MyNetMC, 
             "AlBERTo MC", 
             "models/alberto_multiclass.pt", 
             dataset_amazon_test, 
             "Amazon reviews", 
             keep_neutrals=False, 
             plot_confidence=False, 
             fine_tuning=True, 
             dataset_train=dataset_amazon_train,
             logging=False,
             model_type="MC"
            )

eval_alberto(MyNetMCTuned, 
                 "AlBERTo MC opt", 
                 "models/alberto_multiclass_tuned.pt", 
                 dataset_amazon_test, 
                 "Amazon reviews", 
                 keep_neutrals=False, 
                 plot_confidence=False, 
                 fine_tuning=True, 
                 dataset_train=dataset_amazon_train,
                 logging=False,
                 model_type="opt"
                );

###### inspect predictions

In [None]:
idx = 193
dataset_amazon_test.text.tolist()[idx] , dataset_amazon_test.labels.tolist()[idx]

In [None]:
for i in range(len(preds)):
    if preds[i] == trues[i] and trues[i] == 1:
        print("\n\n", dataset_amazon_test.text.tolist()[i], i)
        print("Pred=", preds[i])
        print("True=", trues[i])

### AlBERTo opt - Amazon reviews with stop words removal

In [None]:
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords

class AlBERTo_Preprocessing_Stop_words(object):
    def __init__(self, do_lower_case=True, **kwargs):
        self.do_lower_case = do_lower_case
        self.stop_words = set(stopwords.words('italian'))

    def preprocess(self, text):
        if self.do_lower_case:
            text = text.lower()
        text = text_processor.pre_process_doc(text)
        filtered_text = [w for w in text if not w in self.stop_words]
        text = str(" ".join(filtered_text))
        text = re.sub(r'[^a-zA-ZÀ-ú</>!?♥♡\s\U00010000-\U0010ffff]', ' ', text)
        text = re.sub(r'\s+', ' ', text)
        text = re.sub(r'(\w)\1{2,}', r'\1\1', text)
        text = re.sub(r'^\s', '', text)
        text = re.sub(r'\s$', '', text)
        return text

b = AlBERTo_Preprocessing_Stop_words(do_lower_case=True)

dataset_amazon = pd.DataFrame({"text": amazon_test.text.apply(b.preprocess), "idx": amazon_test.index, "labels": amazon_test[["rating"]].apply(rating2sentiment, axis=1)})
print("\n\n AlBERTo MC opt no stop words\n\n")
eval_alberto(MyNetMCTuned, "AlBERTo MC opt no stop words", "models/alberto_multiclass_tuned.pt", dataset_amazon, "Amazon reviews", keep_neutrals=True, plot_confidence=False)

### Feel-it - Amazon reviews

In [None]:
eval_feelit_model(dataset_amazon_test, keep_neutrals=False, dataset_name="Amazon reviews", plot_confidence=False)
eval_feelit_model(dataset_amazon_test, keep_neutrals=True, dataset_name="Amazon reviews", plot_confidence=False)

### Feel-it MC fine tuned - Amazon reviews

In [None]:
eval_feelit_model(dataset_amazon_test, keep_neutrals=True, fine_tune=True, dataset_train=dataset_amazon_train, dataset_name="Amazon reviews", plot_confidence=False);

---
# MultiEmotions-it

In [None]:
def separate2united_labels(row):
    """
        Return a single scalar integer label associated to the polarity of the tweet.

        Negative -> 0
        Neutral  -> 1
        Positive -> 2
        Mixed/UNRELATED    -> 3
    """
    if row["POS"] == 0 and row["NEG"] == 0 and row["NEUT"] == 1:
        return 1
    elif row["NEG"] == 0 and row["POS"] == 1:
        return 2
    elif row["NEG"] == 1 and row["POS"] == 0:
        return 0
    else:
        return 3

##
# To split the dataset, one split, read the directly the 2 files
##
# me_train = pd.read_csv(PATH + "Multiemotions-it/Multiemotions-it.tsv", sep='\t')
# me_train["labels"] = me_train[["POS", "NEG", "NEUT"]].apply(separate2united_labels, axis=1)

# X_train, X_test = train_test_split(me_train, test_size=0.15, random_state=42, stratify=me_train["labels"])
# X_train.to_csv(PATH + "Multiemotions-it/Multiemotions-it_train.tsv", sep='\t', index=False)
# X_test.to_csv(PATH + "Multiemotions-it/Multiemotions-it_test.tsv", sep='\t', index=False)

In [None]:
me_train = pd.read_csv(PATH + "Multiemotions-it/Multiemotions-it_train.tsv", sep='\t')
me_test = pd.read_csv(PATH + "Multiemotions-it/Multiemotions-it_test.tsv", sep='\t')

me_train["text"] = me_train["comment"] 
me_test["text"] = me_test["comment"]

me_train.drop(['type', 'title', 'URL', 'comment', 'UNRELATED', 'NEUT', 'POS', 'NEG',
               'GIOIA', 'FIDUCIA', 'TRISTEZZA', 'RABBIA', 'PAURA', 'DISGUSTO',
               'SORPRESA', 'TREPIDAZIONE', 'SARCASM', 'EMOTIONS',], axis=1, inplace=True)
me_test.drop(['type', 'title', 'URL', 'comment', 'UNRELATED', 'NEUT', 'POS', 'NEG',
               'GIOIA', 'FIDUCIA', 'TRISTEZZA', 'RABBIA', 'PAURA', 'DISGUSTO',
               'SORPRESA', 'TREPIDAZIONE', 'SARCASM', 'EMOTIONS',], axis=1, inplace=True)

print("Train: ", len(me_train) , np.unique(me_train.labels, return_counts=True)[1]/len(me_train))
print("Test:", len(me_test) , np.unique(me_test.labels, return_counts=True)[1]/len(me_test))

print("\nAverage comment lenght Train: ", np.mean(me_train["text"].apply(len)))
print("\nAverage comment lenght Test: ", np.mean(me_test["text"].apply(len)))
me_test.head()

In [None]:
dataset_me_train = pd.DataFrame({"text": me_train.text.apply(a.preprocess), "idx": me_train.index, "labels": me_train["labels"].tolist()}) 
dataset_me_test  = pd.DataFrame({"text": me_test.text.apply(a.preprocess), "idx": me_test.index, "labels": me_test["labels"].tolist()}) 
print("train: ", len(dataset_me_train[dataset_me_train.labels != 3]), np.unique(dataset_me_train[dataset_me_train.labels != 3].labels, return_counts=True)[1]/len(dataset_me_train[dataset_me_train.labels != 3]))
print("Test: ", len(dataset_me_test[dataset_me_test.labels != 3]), np.unique(dataset_me_test[dataset_me_test.labels != 3].labels, return_counts=True)[1]/len(dataset_me_test[dataset_me_test.labels != 3]))

lexicon_me = get_lexicon(dataset_me_test.text)
print("MultiEmotions-IT lexicon  ∩ Feel-it lexicon: ", round(len(lexicon_me.intersection(lexicon_feel_it))/len(lexicon_me.union(lexicon_feel_it)),2))
print("MultiEmotions-IT lexicon  ∩ Sentipolc lexicon: ", round(len(lexicon_me.intersection(lexicon_sentipolc))/len(lexicon_me.union(lexicon_sentipolc)),2))

tmp = pd.concat([dataset_me_train[dataset_me_train.labels != 3], dataset_me_test[dataset_me_test.labels != 3]])
print("All: ", len(tmp), np.unique(tmp.labels, return_counts=True)[1]/len(tmp))
print("Size lexicon all: ", len(get_lexicon(tmp.text)))
print("Avg num tokens: ", round(np.mean(tmp["text"].apply(lambda x: len(x.split(" ")))), 2))

In [None]:
dataset_me_test.head()

### EvSent baseline

In [None]:
%%script echo skipping
examples = dataset_me_test.loc[dataset_me_test.labels != 3]

preds , true , pbar = np.full(len(examples), -1) , np.full(len(examples), -1) , tqdm(total=len(examples))
index , lock , p = 0 , Lock() , ThreadPool(processes=10)
for text , label in zip(examples.text, examples.labels):
    p.apply_async(worker, (label, text, pbar, preds, true, lock))
p.close(); p.join()
pbar.close()
assert not np.any(preds == -1) and not np.any(true == -1)
print(classification_report(true, preds, target_names=["negative", "neutral", "positive"]))

##
# No Neutrals
# reassing labels so that no neutrals are present
##
print("\n\n No neutrals\n")
evsent_remove_neutrals(true, preds)

##
# Stochastic MFC
##
print("\n\n SMFC\n")
eval_mfc_baseline(dataset_me_train, dataset_me_test, keep_neutrals=True)

print("\n\n SMFC\n")
eval_mfc_baseline(dataset_me_train, dataset_me_test, keep_neutrals=False)

### AlBERTo MC/opt - MultiEmotions-IT

In [None]:
preds2 , trues2, proba2 = eval_alberto(MyNetMC,      "AlBERTo MC", "models/alberto_multiclass.pt", dataset_me_test, "MultiEmotions-IT", return_preds=True, keep_neutrals=True, plot_confidence=False)
eval_alberto(MyNetMCTuned, "AlBERTo MC opt", "models/alberto_multiclass_tuned.pt", dataset_me_test, "MultiEmotions-IT", keep_neutrals=True, plot_confidence=False)

In [None]:
dataset_me_test[dataset_me_test.text.str.contains("é dedicata a tutti i ragazzi")]

In [None]:
for i in range(len(preds2)):
    if preds2[i] == 2 and trues2[i] == 0:
        print("\n\n", dataset_me_test[dataset_me_test.labels != 3].text.tolist()[i], i)
        print("Pred=", preds2[i])
        print("True=", trues2[i])

In [None]:
eval_alberto(MyNetMC,      "AlBERTo MC", "models/alberto_multiclass.pt", dataset_me_test, "MultiEmotions-IT", keep_neutrals=False, plot_confidence=False)
eval_alberto(MyNetMCTuned, "AlBERTo MC opt", "models/alberto_multiclass_tuned.pt", dataset_me_test, "MultiEmotions-IT", keep_neutrals=False, plot_confidence=False)

### AlBERTo MC/opt fine-tuned - MultiEmotions-IT

In [None]:
preds , trues , proba = eval_alberto(MyNetMC, 
                         "AlBERTo MC", 
                         "models/alberto_multiclass.pt", 
                         dataset_me_test, 
                         "MultiEmotions-IT", 
                         return_preds=True,
                         keep_neutrals=True, 
                         plot_confidence=False, 
                         fine_tuning=True, 
                         dataset_train=dataset_me_train,
                         logging=False,
                         model_type="MC"
                        )

eval_alberto(MyNetMCTuned, 
             "AlBERTo MC opt", 
             "models/alberto_multiclass_tuned.pt", 
             dataset_me_test, 
             "MultiEmotions-IT", 
             keep_neutrals=True, 
             plot_confidence=False, 
             fine_tuning=True, 
             dataset_train=dataset_me_train,
             logging=False,
             model_type="opt"
            );

##### inspect comments - neutrals

In [None]:
dataset_me_test[dataset_me_test.text.str.contains("anche se ascolto")]

In [None]:
for i in range(len(preds)):
    if preds[i] != trues[i] and trues[i] == 1:
        print("\n\n", dataset_me_test[dataset_me_test.labels != 3].text.tolist()[i], i)
        print("Pred=", preds[i])
        print("True=", trues[i])

### Feel-IT - MultiEmotions-IT

In [None]:
eval_feelit_model(dataset_me_test, keep_neutrals=True, dataset_name="MultiEmotions-IT", plot_confidence=False)
eval_feelit_model(dataset_me_test, keep_neutrals=False, dataset_name="MultiEmotions-IT", plot_confidence=False)

### Feel-it MC fine tuned - MultiEmotions-IT

In [None]:
eval_feelit_model(dataset_me_test, keep_neutrals=True, fine_tune=True, dataset_train=dataset_me_train, dataset_name="MultiEmotions-IT", plot_confidence=False);

---
# Coadapt sentiment

In [None]:
def label_string2int(row):
    if row["label"] == "neutral":
        return 1
    elif row["label"] == "negative":
        return 0
    elif row["label"] == "positive":
        return 2

coadapt = pd.read_json(PATH + "Coadapt/coadapt_sentiment.json")
coadapt = pd.DataFrame({"text": coadapt.text.apply(a.preprocess), "idx": coadapt.index, "labels": coadapt[["label"]].apply(label_string2int, axis=1)}) 

print("Train: ", len(coadapt) , np.unique(coadapt.labels, return_counts=True)[1]/len(coadapt))
print("\nAverage text lenght Train: ", np.mean(coadapt["text"].apply(len)))

lexicon_coadapt = get_lexicon(coadapt.text)
print("\nCoadapt lexicon  ∩ Feel-it lexicon: ", round(len(lexicon_coadapt.intersection(lexicon_feel_it))/len(lexicon_coadapt.union(lexicon_feel_it)),2))
print("Coadapt lexicon  ∩ Sentipolc lexicon: ", round(len(lexicon_coadapt.intersection(lexicon_sentipolc))/len(lexicon_coadapt.union(lexicon_sentipolc)),2))


tmp = coadapt
print("All: ", len(tmp), np.unique(tmp.labels, return_counts=True)[1]/len(tmp))
print("Size lexicon all: ", len(get_lexicon(tmp.text)))
print("Avg num tokens: ", round(np.mean(tmp["text"].apply(lambda x: len(x.split(" ")))), 2))

coadapt.head()

### EvSent baseline

In [None]:
#%%script echo skipping
examples = coadapt

preds , true , pbar = np.full(len(examples), -1) , np.full(len(examples), -1) , tqdm(total=len(examples))
index , lock , p = 0 , Lock() , ThreadPool(processes=10)
for text , label in zip(examples.text, examples.labels):
    p.apply_async(worker, (label, text, pbar, preds, true, lock))
p.close(); p.join()
pbar.close()
assert not np.any(preds == -1) and not np.any(true == -1)
print(classification_report(true, preds, target_names=["negative", "neutral", "positive"]))

##
# No Neutrals
# reassing labels so that no neutrals are present
##
print("\n\n No neutrals\n")
evsent_remove_neutrals(true, preds)

### AlBERTo MC/opt - Coadapt sentiment

In [None]:
preds2 , trues2 , proba2 = eval_alberto(MyNetMC, "AlBERTo MC", "models/alberto_multiclass.pt", coadapt, "Coadapt", keep_neutrals=True, return_preds=True, plot_confidence=False)
eval_alberto(MyNetMCTuned, "AlBERTo MC opt", "models/alberto_multiclass_tuned.pt", coadapt, "Coadapt", keep_neutrals=True, plot_confidence=False)

In [None]:
for i in range(len(preds2)):
    if preds2[i] == 0 and trues2[i] == 1:
        print("\n\n", coadapt.text.tolist()[i], i)
        print("Pred=", preds2[i])
        print("True=", trues2[i])

In [None]:
eval_alberto(MyNetMC, "AlBERTo MC", "models/alberto_multiclass.pt", coadapt, "Coadapt", keep_neutrals=False, return_preds=True, plot_confidence=False)
eval_alberto(MyNetMCTuned, "AlBERTo MC opt", "models/alberto_multiclass_tuned.pt", coadapt, "Coadapt", keep_neutrals=False, plot_confidence=False)

##### inspect samples

In [None]:
coadapt[coadapt.text.str.contains("finito adesso primo colloqui")]

In [None]:
for i in range(len(preds)):
    if preds[i] != trues[i] and trues[i] == 2:
        print("\n\n", coadapt[coadapt.labels != 3].text.tolist()[i], i)
        print("Pred=", preds[i])
        print("True=", trues[i])

### Feel-it - Coadapt sentiment

In [None]:
eval_feelit_model(coadapt, keep_neutrals=False, dataset_name="Coadapt", plot_confidence=False)
eval_feelit_model(coadapt, keep_neutrals=True, dataset_name="Coadapt", plot_confidence=False)

---

# AriEmozione dataset

In [None]:
ari_train = pd.read_csv(PATH + "Aria/ariaset_train.tsv", sep='\t', encoding="latin-1", names=["names", "text", "labels", "confidence", "?"])
ari_train.drop(["?", "names"], axis=1, inplace=True)
ari_train.dropna(inplace=True)

ari_test = pd.read_csv(PATH + "Aria/ariaset_test.tsv", sep='\t', encoding="latin-1", names=["names", "text", "labels", "confidence", "?"])
ari_test.drop(["?", "names"], axis=1, inplace=True)
ari_test.dropna(inplace=True)

print("Train: ", len(ari_train) , np.unique(ari_train.labels, return_counts=True)[1]/len(ari_train))
print("Test:", len(ari_test) , np.unique(ari_test.labels, return_counts=True)[1]/len(ari_test))

print("\nAverage piece lenght Train: ", np.mean(ari_train["text"].apply(len)))
print("\nAverage piece lenght Test: ", np.mean(ari_test["text"].apply(len)))

ari_test.head()

In [None]:
def emotion2label(row):
    if row["labels"] in ["Gioia", "Ammirazione", "Amore"]:
        return 2
    elif row["labels"] in ["Rabbia", "Tristezza", "Paura"]:
        return 0
    else:
        return 1

dataset_ari_train = pd.DataFrame({"text": ari_train.text.apply(a.preprocess), "idx": ari_train.index, "labels": ari_train[["labels"]].apply(emotion2label, axis=1)}) 
dataset_ari_test = pd.DataFrame({"text": ari_test.text.apply(a.preprocess), "idx": ari_test.index, "labels": ari_test[["labels"]].apply(emotion2label, axis=1)}) 
print("Train: ", len(dataset_ari_train) , np.unique(dataset_ari_train.labels, return_counts=True)[1]/len(dataset_ari_train))
print("Test:", len(dataset_ari_test) , np.unique(dataset_ari_test.labels, return_counts=True)[1]/len(dataset_ari_test))

lexicon_ari = get_lexicon(dataset_ari_test.text)
print("\nAriEmozione lexicon  ∩ Feel-it lexicon: ", round(len(lexicon_ari.intersection(lexicon_feel_it))/len(lexicon_ari.union(lexicon_feel_it)),2))
print("AriEmozione lexicon  ∩ Sentipolc lexicon: ", round(len(lexicon_ari.intersection(lexicon_sentipolc))/len(lexicon_ari.union(lexicon_sentipolc)),2))

tmp = pd.concat([dataset_ari_train, dataset_ari_test])
print("All: ", len(tmp), np.unique(tmp.labels, return_counts=True)[1]/len(tmp))
print("Size lexicon all: ", len(get_lexicon(tmp.text)))
print("Avg num tokens: ", round(np.mean(tmp["text"].apply(lambda x: len(x.split(" ")))), 2))

In [None]:
dataset_ari_test.text[5]

### EvSent baseline

In [None]:
#%%script echo skipping
examples = dataset_ari_test

preds , true , pbar = np.full(len(examples), -1) , np.full(len(examples), -1) , tqdm(total=len(examples))
index , lock , p = 0 , Lock() , ThreadPool(processes=10)
for text , label in zip(examples.text, examples.labels):
    p.apply_async(worker, (label, text, pbar, preds, true, lock))
p.close(); p.join()
pbar.close()
assert not np.any(preds == -1) and not np.any(true == -1)
print(classification_report(true, preds, target_names=["negative", "neutral", "positive"]))

##
# No Neutrals
# reassing labels so that no neutrals are present
##
print("\n\n No neutrals\n")
evsent_remove_neutrals(true, preds)

##
# Stochastic MFC
##
print("\n\n SMFC\n")
eval_mfc_baseline(dataset_ari_train, dataset_ari_test, keep_neutrals=False)

### AlBERTo MC/opt - AriEmotions

In [None]:
preds2 , trues2 , proba2 = eval_alberto(MyNetMC,      "AlBERTo MC", "models/alberto_multiclass.pt", dataset_ari_test, "AriEmotions", return_preds=True, keep_neutrals=True, plot_confidence=False)
eval_alberto(MyNetMC,      "AlBERTo MC", "models/alberto_multiclass.pt", dataset_ari_test, "AriEmotions", return_preds=False, keep_neutrals=False, plot_confidence=False)
eval_alberto(MyNetMCTuned, "AlBERTo MC opt", "models/alberto_multiclass_tuned.pt", dataset_ari_test, "AriEmotions", keep_neutrals=True, plot_confidence=False)
eval_alberto(MyNetMCTuned, "AlBERTo MC opt", "models/alberto_multiclass_tuned.pt", dataset_ari_test, "AriEmotions", keep_neutrals=False, plot_confidence=False)

In [None]:
for i in range(len(preds2)):
    if preds2[i] == 1 and trues2[i] == 0 or "conoscerai chi sono ti pentirai del dono ma sarà tardi allor" in dataset_ari_test.text.tolist()[i]:
        print("\n\n", dataset_ari_test.text.tolist()[i], i)
        print("Pred=", preds2[i])
        print("True=", trues2[i])

### AlBERTo MC/opt fine tuned - AriEmotions

In [None]:
eval_alberto(MyNetMC, 
             "AlBERTo MC", 
             "models/alberto_multiclass.pt", 
             dataset_ari_test, 
             "AriEmotions", 
             keep_neutrals=True, 
             plot_confidence=False, 
             fine_tuning=True, 
             dataset_train=dataset_ari_train,
             logging=False,
             model_type="MC"
            )

eval_alberto(MyNetMCTuned, 
             "AlBERTo MC opt", 
             "models/alberto_multiclass_tuned.pt", 
             dataset_ari_test, 
             "AriEmotions", 
             keep_neutrals=True, 
             plot_confidence=False, 
             fine_tuning=True, 
             dataset_train=dataset_ari_train,
             logging=False,
             model_type="opt"
            );

### Feel-it model - AriEmotions

In [None]:
eval_feelit_model(dataset_ari_test, keep_neutrals=True, dataset_name="AriEmotions", plot_confidence=False)
eval_feelit_model(dataset_ari_test, keep_neutrals=False, dataset_name="AriEmotions", plot_confidence=False)

### Feel-it MC fine tuned - AriEmotions

In [None]:
eval_feelit_model(dataset_ari_test, keep_neutrals=True, fine_tune=True, dataset_train=dataset_ari_train, dataset_name="AriEmotions", plot_confidence=False);

---
# Trip-maml

In [None]:
def segments2text(row):
    return " ".join([ e.split("/")[0] for sub_segment in row["segments"] for e in sub_segment.split(" ")]) 

def rating2sentiment(row):
    """
        Returns the polarity depending on the rating of the review
    """
    if row["ratingOverall"] == 3:
        return 1
    elif row["ratingOverall"] > 3:
        return 2
    elif row["ratingOverall"] < 3:
        return 0

trip_train = pd.read_json(PATH + "Trip-maml/italian_training.json", lines=True)
dataset_trip_train = pd.DataFrame({"text": trip_train[["segments"]].apply(segments2text, axis=1), "idx": trip_train.index, "labels": trip_train[["ratingOverall"]].apply(rating2sentiment, axis=1)}) 

trip_test = pd.read_json(PATH + "Trip-maml/italian_test.json", lines=True)
dataset_trip_test = pd.DataFrame({"text": trip_test[["segments"]].apply(segments2text, axis=1), "idx": trip_test.index, "labels": trip_test[["ratingOverall"]].apply(rating2sentiment, axis=1)}) 

print("Train: ", len(dataset_trip_train) , np.unique(dataset_trip_train.labels, return_counts=True)[1]/len(dataset_trip_train))
print("Test:  ", len(dataset_trip_test) , np.unique(dataset_trip_test.labels, return_counts=True)[1]/len(dataset_trip_test))

print("\nAverage piece lenght Train: ", np.mean(dataset_trip_train["text"].apply(len)))
print("\nAverage piece lenght Test: ", np.mean(dataset_trip_test["text"].apply(len)))

lexicon_trip = get_lexicon(dataset_trip_test.text)
print("\nTrip-maml lexicon  ∩ Feel-it lexicon: ", round(len(lexicon_trip.intersection(lexicon_feel_it))/len(lexicon_trip.union(lexicon_feel_it)),2))
print("Trip-maml lexicon  ∩ Sentipolc lexicon: ", round(len(lexicon_trip.intersection(lexicon_sentipolc))/len(lexicon_trip.union(lexicon_sentipolc)),2))

tmp = pd.concat([dataset_trip_train, dataset_trip_test])
print("All: ", len(tmp), np.unique(tmp.labels, return_counts=True)[1]/len(tmp))
print("Size lexicon all: ", len(get_lexicon(tmp.text)))
print("Avg num tokens: ", round(np.mean(tmp["text"].apply(lambda x: len(x.split(" ")))), 2))

### EvSent baseline

In [None]:
#%%script echo skipping
examples = dataset_trip_test[dataset_trip_test.labels != 3]

preds , true , pbar = np.full(len(examples), -1) , np.full(len(examples), -1) , tqdm(total=len(examples))
index , lock , p = 0 , Lock() , ThreadPool(processes=10)
for text , label in zip(examples.text, examples.labels):
    p.apply_async(worker, (label, text, pbar, preds, true, lock))
p.close(); p.join()
pbar.close()
assert not np.any(preds == -1) and not np.any(true == -1)
print(classification_report(true, preds, target_names=["negative", "neutral", "positive"]))

##
# No Neutrals
# reassing labels so that no neutrals are present
##
print("\n\n No neutrals\n")
evsent_remove_neutrals(true, preds)

##
# Stochastic MFC
##
print("\n\n SMFC\n")
eval_mfc_baseline(dataset_trip_train, dataset_trip_test, keep_neutrals=True)

print("\n\n SMFC no neutrals\n")
eval_mfc_baseline(dataset_trip_train, dataset_trip_test, keep_neutrals=False)

### AlBERTo MC/opt - Trip-maml

In [None]:
preds2 , trues2 , proba2 = eval_alberto(MyNetMC,      "AlBERTo MC", "models/alberto_multiclass.pt", dataset_trip_test, "Trip-maml", return_preds=True, keep_neutrals=True, plot_confidence=False)
eval_alberto(MyNetMCTuned, "AlBERTo MC opt", "models/alberto_multiclass_tuned.pt", dataset_trip_test, "Trip-maml", keep_neutrals=True, plot_confidence=False)

In [None]:
for i in range(len(preds2)):
    if preds2[i] == 2 and trues2[i] == 1:
        print("\n\n", dataset_trip_test[dataset_trip_test.labels != 3].text.tolist()[i], i)
        print("Pred=", preds2[i])
        print("True=", trues2[i])

In [None]:
eval_alberto(MyNetMC,      "AlBERTo MC", "models/alberto_multiclass.pt", dataset_trip_test, "Trip-maml", keep_neutrals=False, plot_confidence=False)
eval_alberto(MyNetMCTuned, "AlBERTo MC opt", "models/alberto_multiclass_tuned.pt", dataset_trip_test, "Trip-maml", keep_neutrals=False, plot_confidence=False)

### AlBERTo MC/opt fine tuned - Trip-maml

In [None]:
preds , trues , proba = eval_alberto(MyNetMC, 
                         "AlBERTo MC", 
                         "models/alberto_multiclass.pt", 
                         dataset_trip_test, 
                         "Trip-maml", 
                         keep_neutrals=True, 
                         return_preds=True,
                         plot_confidence=False, 
                         fine_tuning=True, 
                         dataset_train=dataset_trip_train,
                         logging=False,
                         model_type="MC"
                        )

eval_alberto(MyNetMCTuned, 
             "AlBERTo MC opt", 
             "models/alberto_multiclass_tuned.pt", 
             dataset_trip_test, 
             "Trip-maml", 
             keep_neutrals=True, 
             plot_confidence=False, 
             fine_tuning=True, 
             dataset_train=dataset_trip_train,
             logging=False,
             model_type="opt"
            );

##### inspect samples

In [None]:
dataset_trip_test[dataset_trip_test.text.str.contains("Leggendo le rece")]

In [None]:
for i in range(len(preds)):
    if preds[i] != trues[i] and trues[i] == 0:
        print("\n\n", dataset_trip_test.text.tolist()[i], i)
        print("Pred=", preds[i])
        print("True=", trues[i])

### Feel-it model - Trip-maml

In [None]:
eval_feelit_model(dataset_trip_test, keep_neutrals=False, dataset_name="Trip-maml", plot_confidence=False)
eval_feelit_model(dataset_trip_test, keep_neutrals=True, dataset_name="Trip-maml", plot_confidence=False)

### Feel-it MC fine tuned - Trip-MAML

In [None]:
eval_feelit_model(dataset_trip_test, keep_neutrals=True, fine_tune=True, dataset_train=dataset_trip_train, dataset_name="Trip-maml", plot_confidence=False);

# Coadapt valence

In [None]:
def label_string2int(row):
    if row["label"] == "neutral":
        return 1
    elif row["label"] == "negative":
        return 0
    elif row["label"] == "positive":
        return 2

coadapt_v_train = pd.read_json(PATH + "Coadapt_valence/train_set.json")
coadapt_v_train = pd.DataFrame({"text": coadapt_v_train.text.apply(a.preprocess), "idx": coadapt_v_train.index, "labels": coadapt_v_train[["label"]].apply(label_string2int, axis=1)}) 
print("Train: ", len(coadapt_v_train) , np.unique(coadapt_v_train.labels, return_counts=True)[1]/len(coadapt_v_train))

coadapt_v_A = pd.read_json(PATH + "Coadapt_valence/partition_A.json")
coadapt_v_A = pd.DataFrame({"text": coadapt_v_A.text.apply(a.preprocess), "idx": coadapt_v_A.index, "labels": coadapt_v_A[["label"]].apply(label_string2int, axis=1)}) 

coadapt_v_B = pd.read_json(PATH + "Coadapt_valence/partition_B.json")
coadapt_v_B = pd.DataFrame({"text": coadapt_v_B.text.apply(a.preprocess), "idx": coadapt_v_B.index, "labels": coadapt_v_B[["label"]].apply(label_string2int, axis=1)}) 

coadapt_v_C = pd.read_json(PATH + "Coadapt_valence/partition_C.json")
coadapt_v_C = pd.DataFrame({"text": coadapt_v_C.text.apply(a.preprocess), "idx": coadapt_v_C.index, "labels": coadapt_v_C[["label"]].apply(label_string2int, axis=1)}) 

coadapt_v_D = pd.read_json(PATH + "Coadapt_valence/partition_D.json")
coadapt_v_D = pd.DataFrame({"text": coadapt_v_D.text.apply(a.preprocess), "idx": coadapt_v_D.index, "labels": coadapt_v_D[["label"]].apply(label_string2int, axis=1)}) 


tmp = pd.concat([coadapt_v_train, coadapt_v_A, coadapt_v_B, coadapt_v_C, coadapt_v_D])
print("All: ", len(tmp), np.unique(tmp.labels, return_counts=True)[1]/len(tmp))
print("Size lexicon all: ", len(get_lexicon(tmp.text)))
print("Avg num tokens: ", round(np.mean(tmp["text"].apply(lambda x: len(x.split(" ")))), 2))

coadapt_v_train.head()

### EvSent baseline

In [None]:
examples = pd.concat([coadapt_v_C, coadapt_v_D]) 

preds , true , pbar = np.full(len(examples), -1) , np.full(len(examples), -1) , tqdm(total=len(examples))
index , lock , p = 0 , Lock() , ThreadPool(processes=10)
for text , label in zip(examples.text, examples.labels):
    p.apply_async(worker, (label, text, pbar, preds, true, lock))
p.close(); p.join()
pbar.close()
assert not np.any(preds == -1) and not np.any(true == -1)
print(classification_report(true, preds, target_names=["negative", "neutral", "positive"]))

##
# No Neutrals
# reassing labels so that no neutrals are present
##
print("\n\n No neutrals\n")
evsent_remove_neutrals(true, preds)

##
# Stochastic MFC
##
print("\n\n SMFC\n")
eval_mfc_baseline(coadapt_v_train, pd.concat([coadapt_v_C, coadapt_v_D]), keep_neutrals=True)

print("\n\n SMFC no neutrals\n")
eval_mfc_baseline(coadapt_v_train, pd.concat([coadapt_v_C, coadapt_v_D]), keep_neutrals=False)

### AlBERTo MC/opt - Coadapt valence

In [None]:
preds2 , trues2 , proba2 = eval_alberto(MyNetMC,      "AlBERTo MC", "models/alberto_multiclass.pt", pd.concat([coadapt_v_C, coadapt_v_D]), "Coadapt valence", return_preds=True, keep_neutrals=True, plot_confidence=False)
eval_alberto(MyNetMCTuned, "AlBERTo MC opt", "models/alberto_multiclass_tuned.pt", pd.concat([coadapt_v_C, coadapt_v_D]), "Coadapt valence", keep_neutrals=True, plot_confidence=False)

In [None]:
eval_alberto(MyNetMC,      "AlBERTo MC",           "models/alberto_multiclass.pt", pd.concat([coadapt_v_C, coadapt_v_D]), "Coadapt valence", return_preds=True, keep_neutrals=False, plot_confidence=False)
eval_alberto(MyNetMCTuned, "AlBERTo MC opt", "models/alberto_multiclass_tuned.pt", pd.concat([coadapt_v_C, coadapt_v_D]), "Coadapt valence", keep_neutrals=False, plot_confidence=False)

### AlBERTo MC/opt fine tuned - Coadapt valence

In [None]:
eval_alberto(MyNetMC, 
                         "AlBERTo MC", 
                         "models/alberto_multiclass.pt", 
                         pd.concat([coadapt_v_C, coadapt_v_D]), 
                         "Coadapt valence", 
                         keep_neutrals=True, 
                         return_preds=True,
                         plot_confidence=False, 
                         fine_tuning=True, 
                         dataset_train=coadapt_v_train,
                         logging=False,
                         model_type="MC"
                        )

eval_alberto(MyNetMCTuned, 
             "AlBERTo MC opt", 
             "models/alberto_multiclass_tuned.pt", 
             pd.concat([coadapt_v_C, coadapt_v_D]), 
             "Coadapt valence", 
             keep_neutrals=True, 
             plot_confidence=False, 
             fine_tuning=True, 
             dataset_train=coadapt_v_train,
             logging=False,
             model_type="opt"
            );

### Feel-it model - Coadapt valence

In [None]:
eval_feelit_model(pd.concat([coadapt_v_C, coadapt_v_D]), keep_neutrals=False, dataset_name="Coadapt valence", plot_confidence=False)
eval_feelit_model(pd.concat([coadapt_v_C, coadapt_v_D]), keep_neutrals=True, dataset_name="Coadapt valence", plot_confidence=False)

### Feel-it model fine tuned - Coadapt valence

In [None]:
eval_feelit_model(pd.concat([coadapt_v_C, coadapt_v_D]), keep_neutrals=True, fine_tune=True, dataset_train=coadapt_v_train, dataset_name="Coadapt valence", plot_confidence=False);

---
# Training on all datasets

##### Feel-it

In [None]:
dataset_all_feel_it = pd.concat([coadapt_v_train, dataset_sentipolc_train, dataset_amazon_train, dataset_ari_train, dataset_trip_train, dataset_me_train])
print("All train datasets: ", len(dataset_all_feel_it[dataset_all_feel_it.labels != 3]), np.unique(dataset_all_feel_it[dataset_all_feel_it.labels != 3].labels, return_counts=True)[1]/len(dataset_all_feel_it[dataset_all_feel_it.labels != 3]))

In [None]:
model = eval_feelit_model(dataset_trip_test, keep_neutrals=True, fine_tune=True, dataset_train=dataset_all_feel_it, dataset_name="Trip-maml", plot_confidence=False)

In [None]:
eval_feelit_model(dataset_trip_test,     keep_neutrals=True, fine_tune=False, model_i=model, dataset_name="Trip-maml", plot_confidence=False)
eval_feelit_model(coadapt,               keep_neutrals=True, fine_tune=False, model_i=model, dataset_name="Coadapt", plot_confidence=False)
eval_feelit_model(dataset_feel_it,       keep_neutrals=True, fine_tune=False, model_i=model, dataset_name="Feel-it", plot_confidence=False)
eval_feelit_model(dataset_ari_test,      keep_neutrals=True, fine_tune=False, model_i=model, dataset_name="AriEmotions", plot_confidence=False)
eval_feelit_model(dataset_sentipolc_test,keep_neutrals=True, fine_tune=False, model_i=model, dataset_name="SentiPolc16", plot_confidence=False)
eval_feelit_model(dataset_me_test,       keep_neutrals=True, fine_tune=False, model_i=model, dataset_name="MultiEMotions", plot_confidence=False)
eval_feelit_model(dataset_amazon_test,   keep_neutrals=True, fine_tune=False, model_i=model, dataset_name="Amazon reviews", plot_confidence=False)
eval_feelit_model(pd.concat([coadapt_v_C, coadapt_v_D]),   keep_neutrals=True, fine_tune=False, model_i=model, dataset_name="Coadapt valence", plot_confidence=False)

###### AlBERTo MC

In [None]:
dataset_all_alberto = pd.concat([coadapt_v_train, dataset_amazon_train, dataset_ari_train, dataset_trip_train, dataset_me_train])
print("All train datasets: ", len(dataset_all_alberto[dataset_all_alberto.labels != 3]), np.unique(dataset_all_alberto[dataset_all_alberto.labels != 3].labels, return_counts=True)[1]/len(dataset_all_alberto[dataset_all_alberto.labels != 3]))

In [None]:
model = eval_alberto(MyNetMC, "AlBERTo MC", "models/alberto_multiclass.pt", dataset_sentipolc_test, "Sentipolc16", fine_tuning=True, dataset_train=dataset_all_alberto, model_type="MC", logging=True, keep_neutrals=True, plot_confidence=False)

In [None]:
eval_alberto(None, "AlBERTo MC fine tuned on all datasets", None, dataset_sentipolc_test, "Sentipolc16", fine_tuning=False, model_init=model, keep_neutrals=True, plot_confidence=False)
eval_alberto(None, "AlBERTo MC fine tuned on all datasets", None, dataset_trip_test, "Trip-maml", fine_tuning=False, model_init=model, keep_neutrals=True, plot_confidence=False)
#eval_alberto(None, "AlBERTo MC fine tuned on all datasets", None, coadapt, "Coadapt", fine_tuning=False, model_init=model, keep_neutrals=True, plot_confidence=False)
eval_alberto(None, "AlBERTo MC fine tuned on all datasets", None, dataset_feel_it, "Feel-it", fine_tuning=False, model_init=model, keep_neutrals=True, plot_confidence=False)
eval_alberto(None, "AlBERTo MC fine tuned on all datasets", None, dataset_ari_test, "AriEmotions", fine_tuning=False, model_init=model, keep_neutrals=True, plot_confidence=False)
eval_alberto(None, "AlBERTo MC fine tuned on all datasets", None, dataset_me_test, "MultiEmotions", fine_tuning=False, model_init=model, keep_neutrals=True, plot_confidence=False)
eval_alberto(None, "AlBERTo MC fine tuned on all datasets", None, dataset_amazon_test, "Amazon reviews", fine_tuning=False, model_init=model, keep_neutrals=True, plot_confidence=False)
eval_alberto(None, "AlBERTo MC fine tuned on all datasets", None, pd.concat([coadapt_v_C, coadapt_v_D]), "Coadapt valence", fine_tuning=False, model_init=model, keep_neutrals=True, plot_confidence=False)

---
#### Further error analysis
Use EvSent to compute the number of words with either a positive or negative meaning, in order to spot samples clearly positive or clearly negative.
Then use AlBERTo or Feel-IT to see the its behaviour on samples that have no clear polarity orientation.

In [None]:
import pickle

a_file = open(PATH + "sentiment_lexicon.pkl", "rb")
sentiment_lexicon = pickle.load(a_file)
a_file.close()

In [None]:
def join_tokenized_token(sa):
    ret = []
    for token in sa:
        if token in ("<", ">", "/"):
            continue
        if token[:2] == "##":
            ret[-1] = ret[-1] + token[2:]
        else:
            ret.append(token)
    return ret


def add_to_sentiment_lexicon(dataset):
    for sub_tokenized_sample in tqdm(dataset.text.apply(lambda x: tok.tokenize(x))):
        tokenized_sample = join_tokenized_token(sub_tokenized_sample)
        for token in tokenized_sample:
            if token not in sentiment_lexicon:
                polarity = evSent(token)
                sentiment_lexicon[token] = polarity

#build the sentiment lexicon
add_to_sentiment_lexicon(dataset_sentipolc[dataset_sentipolc.labels != 3])
add_to_sentiment_lexicon(dataset_feel_it)
add_to_sentiment_lexicon(dataset_amazon_test)
add_to_sentiment_lexicon(dataset_me_test)
add_to_sentiment_lexicon(coadapt)
add_to_sentiment_lexicon(dataset_ari_test)
add_to_sentiment_lexicon(dataset_trip_test)

# a_file = open(PATH + "sentiment_lexicon.pkl", "wb")
# pickle.dump(sentiment_lexicon, a_file)
# a_file.close()

In [None]:
def score_dataset(dataset):
    ret , ret2 = [] , []
    for sub_tokenized_sample in tqdm(dataset.text.apply(lambda x: tok.tokenize(x))):
        tokenized_sample = join_tokenized_token(sub_tokenized_sample)
        score = 0
        has_neg , has_pos = False , False
        for token in tokenized_sample:
            if sentiment_lexicon[token] == 2:
                score += 1
                has_pos = True
            elif sentiment_lexicon[token] == 0:
                score -= 1 
                has_neg = True
        ret.append(score)
        ret2.append(has_neg and has_pos)
    return np.array(ret) , np.array(ret2)

scores , has_boths = score_dataset(dataset_sentipolc[dataset_sentipolc.labels != 3])

In [None]:
for i , text in enumerate(dataset_sentipolc[dataset_sentipolc.labels != 3].text.tolist()):
    print(scores[i], has_boths[i] , dataset_sentipolc[dataset_sentipolc.labels != 3].labels.tolist()[i], " --> " ,text)

In [None]:
print(np.unique(dataset_sentipolc[dataset_sentipolc.labels != 3].labels.to_numpy()[has_boths], return_counts=True))
print(np.unique(dataset_sentipolc[dataset_sentipolc.labels != 3].labels.to_numpy()[scores == 0], return_counts=True))
print(np.unique(dataset_sentipolc[dataset_sentipolc.labels != 3].labels.to_numpy()[np.logical_and(scores == 0, has_boths)], return_counts=True))
print(np.unique(dataset_sentipolc[dataset_sentipolc.labels != 3].labels.to_numpy()[np.logical_and(scores == 0, ~has_boths)], return_counts=True))
print(np.unique(dataset_sentipolc[dataset_sentipolc.labels != 3].labels.to_numpy()[scores >= 2], return_counts=True))
print(np.unique(dataset_sentipolc[dataset_sentipolc.labels != 3].labels.to_numpy()[scores >= 3], return_counts=True))
print(np.unique(dataset_sentipolc[dataset_sentipolc.labels != 3].labels.to_numpy()[scores <= -1], return_counts=True))

In [None]:
preds , trues , proba = eval_alberto(MyNetMC, "AlBERTo MC", "models/alberto_multiclass.pt", dataset_sentipolc, "Sentipolc16", return_preds=True, keep_neutrals=True, plot_confidence=False)
assert np.all(trues == dataset_sentipolc[dataset_sentipolc.labels != 3].labels.to_numpy())

In [None]:
fig = go.Figure()

fig.add_trace(go.Violin(y=dataset_sentipolc[dataset_sentipolc.labels != 3].labels.to_numpy()[has_boths], line_color='black',
                               meanline_visible=False, fillcolor='lightseagreen', opacity=0.6,
                               x0='Has both'))
fig.add_trace(go.Violin(y=preds[has_boths], line_color='black',
                               meanline_visible=False, fillcolor='orange', opacity=0.6,
                               x0='Pred has both'))

fig.add_trace(go.Violin(y=dataset_sentipolc[dataset_sentipolc.labels != 3].labels.to_numpy()[scores == 0], line_color='black',
                               meanline_visible=False, fillcolor='lightseagreen', opacity=0.6,
                               x0='Neutral lexicon score'))
fig.add_trace(go.Violin(y=preds[scores == 0], line_color='black',
                               meanline_visible=False, fillcolor='orange', opacity=0.6,
                               x0='Pred neutral lexicon score'))

fig.add_trace(go.Violin(y=dataset_sentipolc[dataset_sentipolc.labels != 3].labels.to_numpy()[np.logical_and(scores == 0, has_boths)], line_color='black',
                               meanline_visible=False, fillcolor='lightseagreen', opacity=0.6,
                               x0='Neutral lexicon score + has both'))
fig.add_trace(go.Violin(y=preds[np.logical_and(scores == 0, has_boths)], line_color='black',
                               meanline_visible=False, fillcolor='orange', opacity=0.6,
                               x0='Pred neutral lexicon score + has both'))

fig.add_trace(go.Violin(y=dataset_sentipolc[dataset_sentipolc.labels != 3].labels.to_numpy()[np.logical_and(scores == 0, ~has_boths)], line_color='black',
                               meanline_visible=False, fillcolor='lightseagreen', opacity=0.6,
                               x0='Neutral lexicon score + not has both'))
fig.add_trace(go.Violin(y=preds[np.logical_and(scores == 0, ~has_boths)], line_color='black',
                               meanline_visible=False, fillcolor='orange', opacity=0.6,
                               x0='Pred neutral lexicon score + not has both'))

fig.add_trace(go.Violin(y=dataset_sentipolc[dataset_sentipolc.labels != 3].labels.to_numpy()[scores >= 2], line_color='black',
                               meanline_visible=False, fillcolor='lightseagreen', opacity=0.6,
                               x0='Score >= 2'))
fig.add_trace(go.Violin(y=preds[scores >= 2], line_color='black',
                               meanline_visible=False, fillcolor='orange', opacity=0.6,
                               x0='Pred score >= 2'))

fig.add_trace(go.Violin(y=dataset_sentipolc[dataset_sentipolc.labels != 3].labels.to_numpy()[scores <= -1], line_color='black',
                               meanline_visible=False, fillcolor='lightseagreen', opacity=0.6,
                               x0='Score <= -1'))
fig.add_trace(go.Violin(y=preds[scores <= -1], line_color='black',
                               meanline_visible=False, fillcolor='orange', opacity=0.6,
                               x0='Pred score <= -1'))

fig.update_layout(
    title="Distribution of sentiment lexicon's scores and model's predictions",
    yaxis_zeroline=False,
    showlegend=False,
    yaxis = dict(
            tickmode = 'array',
            tickvals = [0, 1, 2, 3],
            ticktext = ['Negative', 'Neutral', 'Positive', 'Mixed']
        )
)
fig.show()

In [None]:
fig = go.Figure()

fig.add_trace(go.Violin(y=scores[np.logical_and(trues == 2, preds == 2)], line_color='black',
                               meanline_visible=False, fillcolor='orange', opacity=0.6,
                               x0='True positives'))

fig.add_trace(go.Violin(y=scores[np.logical_and(trues == 0, preds == 0)], line_color='black',
                               meanline_visible=False, fillcolor='orange', opacity=0.6,
                               x0='True negatives'))

fig.add_trace(go.Violin(y=scores[np.logical_and(trues == 1, preds == 1)], line_color='black',
                               meanline_visible=False, fillcolor='orange', opacity=0.6,
                               x0='True neutrals'))

fig.add_trace(go.Violin(y=scores[np.logical_and(trues != 2, preds == 2)], line_color='black',
                               meanline_visible=False, fillcolor='lightseagreen', opacity=0.6,
                               x0='False positives'))

fig.add_trace(go.Violin(y=scores[np.logical_and(trues != 0, preds == 0)], line_color='black',
                               meanline_visible=False, fillcolor='lightseagreen', opacity=0.6,
                               x0='False negatives'))

fig.add_trace(go.Violin(y=scores[np.logical_and(trues != 1, preds == 1)], line_color='black',
                               meanline_visible=False, fillcolor='lightseagreen', opacity=0.6,
                               x0='False neutrals'))

fig.update_layout(
    title="Distribution of sentiment lexicon's scores and model's predictions",
    yaxis_zeroline=False,
    showlegend=False,
    yaxis = dict(
            tickmode = 'array',
            tickvals = [-2, -1, 0, 1, 2, 3, 4, 5]
        )
)
fig.show()