In [1]:
import pandas as pd
import numpy as np

#models
import torch
from transformers import AutoTokenizer, AutoModel
from transformers import EvalPrediction
from torch.utils.data import DataLoader, Dataset
from torch.nn import BCEWithLogitsLoss
from torch.optim import Adam

#metrics
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import roc_curve, auc

from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'
print(f"Device: {device}")

Device: cuda


## TASK 1 - CORPUS

In [5]:
def load_and_merge_data():
    
    #encodng the data into pandas.DataFrame objects
    url_a_test = '/kaggle/input/dataset/arguments-test.tsv'
    df_a_test = pd.read_csv(url_a_test, sep='\t')

    url_a_training = '/kaggle/input/dataset/arguments-training.tsv'
    df_a_training = pd.read_csv(url_a_training, sep='\t')

    url_a_validation = '/kaggle/input/dataset/arguments-validation.tsv'
    df_a_validation = pd.read_csv(url_a_validation, sep='\t')

    url_l_test = '/kaggle/input/dataset/labels-test.tsv'
    df_l_test = pd.read_csv(url_l_test, sep='\t')

    url_l_training = '/kaggle/input/dataset/labels-training.tsv'
    df_l_training = pd.read_csv(url_l_training, sep='\t')

    url_l_validation = '/kaggle/input/dataset/labels-validation.tsv'
    df_l_validation = pd.read_csv(url_l_validation, sep='\t')

    #merge argument dataframes with label dataframes
    df_test = pd.merge(df_a_test, df_l_test, on='Argument ID')
    df_training = pd.merge(df_a_training, df_l_training, on='Argument ID')
    df_validation = pd.merge(df_a_validation, df_l_validation, on='Argument ID')

    return df_test, df_training, df_validation

In [6]:
df_test, df_training, df_validation = load_and_merge_data()

In [7]:
def merge_and_drop_columns(df):
    # Merge level 2 annotations to level 3 categories
    df['Openess to change'] = df[['Self-direction: thought', 'Self-direction: action', 'Stimulation', 'Hedonism']].any(axis=1).astype(int)
    df['Self-enhancement'] = df[['Hedonism', 'Achievement', 'Power: dominance', 'Power: resources', 'Face']].any(axis=1).astype(int)
    df['Conservation'] = df[['Face', 'Security: personal', 'Security: societal', 'Tradition', 'Conformity: rules', 'Conformity: interpersonal', 'Humility']].any(axis=1).astype(int)
    df['Self-transcendence'] = df[['Humility', 'Benevolence: caring', 'Benevolence: dependability', 'Universalism: concern', 'Universalism: nature', 'Universalism: tolerance', 'Universalism: objectivity']].any(axis=1).astype(int)
    
    # Drop unuseful columns
    columns_to_drop = ['Argument ID', 'Self-direction: thought', 'Self-direction: action', 'Stimulation', 'Hedonism', 'Achievement', 'Power: dominance', 'Power: resources', 'Face', 'Security: personal', 'Security: societal', 'Tradition', 'Conformity: rules', 'Conformity: interpersonal', 'Humility', 'Benevolence: caring', 'Benevolence: dependability', 'Universalism: concern', 'Universalism: nature', 'Universalism: tolerance', 'Universalism: objectivity']
    df = df.drop(columns=columns_to_drop)
    
    return df

df_test = merge_and_drop_columns(df_test)
df_training = merge_and_drop_columns(df_training)
df_validation = merge_and_drop_columns(df_validation)


In [8]:
df_test.head()

Unnamed: 0,Conclusion,Stance,Premise,Openess to change,Self-enhancement,Conservation,Self-transcendence
0,We should end affirmative action,against,affirmative action helps with employment equity.,0,1,1,1
1,We should end affirmative action,in favor of,affirmative action can be considered discrimin...,0,1,0,1
2,We should ban naturopathy,in favor of,naturopathy is very dangerous for the most vul...,0,1,1,1
3,We should prohibit women in combat,in favor of,women shouldn't be in combat because they aren...,0,1,0,0
4,We should ban naturopathy,in favor of,once eradicated illnesses are returning due to...,0,1,1,1


### DATA EXPLORATION
Ancora da inserire

### DATA PREPROCESSING

Encoding 'Stance' column into numerical format  

In [9]:
df_test['Stance'] = df_test['Stance'].replace({'in favor of': 1, 'against': 0}).astype(str)
df_training['Stance'] = df_training['Stance'].replace({'in favor of': 1, 'against': 0}).astype(str)
df_validation['Stance'] = df_validation['Stance'].replace({'in favor of': 1, 'against': 0}).astype(str)

  df_test['Stance'] = df_test['Stance'].replace({'in favor of': 1, 'against': 0}).astype(str)
  df_training['Stance'] = df_training['Stance'].replace({'in favor of': 1, 'against': 0}).astype(str)
  df_validation['Stance'] = df_validation['Stance'].replace({'in favor of': 1, 'against': 0}).astype(str)


Preparing data for tokenization input

In [10]:
labels_test = df_test.iloc[:, 3:7].values
labels_training = df_training.iloc[:, 3:7].values
labels_validation = df_validation.iloc[:, 3:7].values

stance_test = df_test['Stance'].values
stance_training = df_training['Stance'].values
stance_validation = df_validation['Stance'].values

Tokenization process and creation of a dataset structure compatible with the bert model 

In [12]:
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
max_length = 100

class BertDatasetCreator(Dataset):
    def __init__(self, encodings, labels, tokenizer, max_length):
        self.encodings = encodings
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length
    
    def __len__(self):
        return len(self.encodings)
    
    def __getitem__(self, idx):
        item = str(self.encodings[idx])
        item = ' '.join(item.split())
        
        encoded_dict = self.tokenizer.encode_plus(
            item,
            add_special_tokens=True,
            max_length=self.max_length,
            padding='max_length',
            return_token_type_ids=True,
            truncation=True,
        )
        input_ids = encoded_dict['input_ids']
        attention_masks = encoded_dict['attention_mask']
        token_type_ids = encoded_dict['token_type_ids']

        return {
            'input_ids': torch.tensor(input_ids, dtype=torch.long),
            'attention_mask': torch.tensor(attention_masks, dtype=torch.long),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'labels': torch.tensor(self.labels[idx], dtype=torch.float)
        }

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

#### Applying the BertDatasetCreator and preparing the datasets for the three different type of BERT models

##### BERT w/C dataset

In [13]:
test_dataset_c = BertDatasetCreator(df_test['Conclusion'], labels_test, tokenizer, max_length)
train_dataset_c = BertDatasetCreator(df_training['Conclusion'], labels_training, tokenizer, max_length)
val_dataset_c = BertDatasetCreator(df_validation['Conclusion'], labels_validation, tokenizer, max_length)

DataLoader definition - which will supply the data to the neural network in batches for efficient training and processing

In [14]:
batch_size = 16
test_dataloader_c = DataLoader(test_dataset_c, batch_size=batch_size)
train_dataloader_c = DataLoader(train_dataset_c, batch_size=batch_size)
val_dataloader_c = DataLoader(val_dataset_c, batch_size=batch_size)

##### BERT w/CP

In [15]:
test_dataset_cp = BertDatasetCreator(df_test['Conclusion'] + ' ' + df_test['Premise'], labels_test, tokenizer, max_length)
train_dataset_cp = BertDatasetCreator(df_training['Conclusion'] + ' ' + df_training['Premise'], labels_training, tokenizer, max_length)
val_dataset_cp = BertDatasetCreator(df_validation['Conclusion'] + ' ' + df_validation['Premise'], labels_validation, tokenizer, max_length)

test_dataloader_cp = DataLoader(test_dataset_cp, batch_size=batch_size)
train_dataloader_cp = DataLoader(train_dataset_cp, batch_size=batch_size)
val_dataloader_cp = DataLoader(val_dataset_cp, batch_size=batch_size)

##### BERT w/CPS

In [16]:
test_dataset_cps = BertDatasetCreator(df_test['Conclusion'] + ' ' + df_test['Premise'] + ' ' + df_test['Stance'], labels_test, tokenizer, max_length)
train_dataset_cps = BertDatasetCreator(df_training['Conclusion'] + ' ' + df_training['Premise'] + ' ' + df_training['Stance'], labels_training, tokenizer, max_length)
val_dataset_cps = BertDatasetCreator(df_validation['Conclusion'] + ' ' + df_validation['Premise'] + ' ' + df_validation['Stance'], labels_validation, tokenizer, max_length)

test_dataloader_cps = DataLoader(test_dataset_cps, batch_size=batch_size)
train_dataloader_cps = DataLoader(train_dataset_cps, batch_size=batch_size)
val_dataloader_cps = DataLoader(val_dataset_cps, batch_size=batch_size)

## TASK 2 - MODEL DEFINITION

### BASELINE MODELS

Random uniform classifier

In [None]:
def create_random_uniform_classifier(category):
    """
    Creates a random classifier predicting 0 or 1 with uniform probability.
    inputs:
        category: Category to predict
    outputs: 
        a function that generates random predictions
    """
    def random_uniform_classifier(size):
        """
        Generates random uniform predictions for the given category.
        inputs: 
            size: number of predictions to generate
        outputs: 
            array of random uniform predictions
        """
        return np.random.choice([0, 1], size=size)
    
    return random_uniform_classifier

Majority classifier

In [None]:
def create_majority_classifier(category, majority_value):
    """
    Creates a majority classifier always predicting the most frequent valorization for the column.
    inputs:
        category: Category to predict
        majority_value: most frequent value (0 or 1)
    outputs:
        a function that generates majority predictions
    """
    def majority_classifier(size):
        """
        Generates majority predictions for the given category.
        inputs: 
            size: number of predictions to generate
        outputs: 
            array of majority predictions
        """
        return np.full(size, majority_value)
    
    return majority_classifier

Creating the baseline models for every category and saving them in a classifiers dictionary

In [None]:
classifiers = {}

categories = ['Openess to change', 'Self-enhancement', 'Conservation', 'Self-transcendence']

#create classifiers for each category and save them in the dictionary
for category in categories:
    #random uniform classifier
    random_uniform_name = f'random_uniform_classifier_{category}'
    classifiers[random_uniform_name] = create_random_uniform_classifier(category)

    #majority classifier
    majority_name = f'majority_classifier_{category}'
    classifiers[majority_name] = create_majority_classifier(category, majority_value=1) #da capire perchè majority_value=1

### BERT MODEL DEFINITION

In [17]:
class Bert_Model(torch.nn.Module):
    def __init__(self):
        super(Bert_Model, self).__init__()
        self.bert = AutoModel.from_pretrained(
            pretrained_model_name_or_path= 'bert-base-uncased', 
            problem_type='multi_label_classification', 
            num_labels = 4, 
            return_dict=False)
        self.dropout = torch.nn.Dropout(p=0.3)
        self.classifier = torch.nn.Linear(self.bert.config.hidden_size, 4)

    def forward(self, input_ids, attention_mask, token_type_ids):
        _, outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
        outputs = self.dropout(outputs)
        outputs = self.classifier(outputs)
        return outputs

#### Bert Models

In [18]:
c_model = Bert_Model()
cp_model = Bert_Model()
cps_model = Bert_Model()

c_model.to(device)
cp_model.to(device)
cps_model.to(device)

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Bert_Model(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_af

In [None]:
print(c_model)

## TASK 3 - METRICS

In [41]:
from sklearn.metrics import classification_report

def generate_classification_report(model, X_test, Y_test):
    
    # Mettere il modello in modalità di valutazione
    model.eval()
    
    # Disabilitare il calcolo dei gradienti per la valutazione
    with torch.no_grad():
        # Predire le etichette utilizzando il modello
        Y_pred = model(X_test['input_ids'], X_test['attention_mask'], X_test['token_type_ids'])
        
        # Convertire le predizioni in numpy array
        Y_pred = Y_pred.detach().cpu().numpy()
    
    # Generare il classification report
    report = classification_report(Y_test, Y_pred, output_dict=True)
    
    # Stampare il report
    print("Classification Report:")
    print(report)
    
    # Estrarre e stampare il macro F1 score
    macro_f1_score = report['macro avg']['f1-score']
    print(f"Macro F1 Score: {macro_f1_score}")

# Esempio di utilizzo:
# Supponiamo che c_model, X_test e Y_test siano già definiti
# generate_classification_report(c_model, X_test, Y_test)

## TASK 4 - TRAINING AND EVALUATION

Training process utils

In [28]:
#definition of the loss function
def loss_function(outputs, targets):
    return torch.nn.BCEWithLogitsLoss()(outputs, targets)

#definition of the optimizers
optimizer = Adam(c_model.parameters(), lr = 1e-5)

# Set seeds for reproducibility
def set_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)

#seeds = [42, 123, 2024]
seeds = 456
set_seed(seeds)

epochs = 10

Training function definition

In [23]:
def trainBert(model, dataloader, optimizer, loss_function):
    size = len(dataloader.dataset)
    model.train()
    running_loss = 0.0
    for batch, data in enumerate(dataloader, 0):
        ids = data['input_ids'].to(device, dtype=torch.long)
        mask = data['attention_mask'].to(device, dtype=torch.long)
        token_type_ids = data['token_type_ids'].to(device, dtype=torch.long)
        labels = data['labels'].to(device)
        optimizer.zero_grad()
        outputs = model(ids, mask, token_type_ids)
    
        loss_value = loss_function(outputs, labels)
        loss_value.backward()
        optimizer.step()
        running_loss += loss_value.item()
        avg_train_loss = running_loss / len(dataloader)
  
        if batch % 100 == 0:
            loss_value, current = loss_value.item(), batch * len(ids)
            print(f"Train loss: {loss_value:>7f}  [{current:>5d}/{size:>5d}]")
    
    return avg_train_loss

Validation function definition

In [26]:

def validate_model(model, dataloader):
    model.eval()
    all_labels = []
    all_outputs = []

    with torch.no_grad():
        for batch_idx, data in enumerate(dataloader, 0):
            input_ids = data['input_ids'].to(device, dtype = torch.long)
            attention_mask = data['attention_mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            labels = data['labels'].to(device, dtype = torch.float)
            outputs = model(input_ids, attention_mask, token_type_ids)
            all_labels.extend(labels.cpu().numpy())
            all_outputs.extend(outputs.cpu().numpy())

    all_labels = np.array(all_labels)
    all_outputs = np.array(all_outputs)

    # Numero di etichette
    num_labels = all_labels.shape[1]

    # Inizializza una lista per memorizzare le soglie ottimali per ogni etichetta
    optimal_thresholds = []
    auc_scores = []

    for i in range(num_labels):
        # Calcola la curva ROC
        fpr, tpr, thresholds = roc_curve(all_labels[:, i], all_outputs[:, i])
        # Calcola l'AUC
        roc_auc = auc(fpr, tpr)
        auc_scores.append(roc_auc)
        # Trova la soglia che massimizza la somma di sensibilità e specificità
        optimal_idx = np.argmax(tpr - fpr)
        optimal_threshold = thresholds[optimal_idx]
        optimal_thresholds.append(optimal_threshold)

    # Applica le soglie ottimali per ottenere le previsioni binarie
    all_preds = np.zeros_like(all_outputs)
    for i in range(num_labels):
        all_preds[:, i] = (all_outputs[:, i] > optimal_thresholds[i]).astype(int)

    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    f1 = f1_score(all_labels, all_preds, average='weighted')

     # Stampa le AUC per ogni etichetta
    for i, auc_score in enumerate(auc_scores):
        print(f'AUC for label {i}: {auc_score}')

    return accuracy, precision, recall, f1, optimal_thresholds


#### TRAINING BERT W/C

In [29]:
for epoch in range(epochs):
    print(f"Epoch {epoch + 1}\n-------------------------------")
    avg_train_loss_c = trainBert(c_model, train_dataloader_c, optimizer, loss_function)
    print(f'Loss: {avg_train_loss:.4f}')
    #validation
    accuracy_c, precision_c, recall_c, f1_c, c_thresholds = validate_model(c_model, val_dataloader_c)
    print(f'Validation - Accuracy: {accuracy_c:.4f}, Precision: {precision_c:.4f}, Recall: {recall_:.4f}, F1 Score: {f1_c:.4f}')

torch.save(c_model.state_dict(), 'model.pthc')


Epoch 1
-------------------------------
Train loss: 0.593304  [    0/ 5393]
Train loss: 0.559206  [ 1600/ 5393]
Train loss: 0.536918  [ 3200/ 5393]
Train loss: 0.579860  [ 4800/ 5393]
Loss: 0.5478
AUC for label 0: 0.5219432100300885
AUC for label 1: 0.6431367946934008
AUC for label 2: 0.4952620035212319
AUC for label 3: 0.5146610140634045
Validation - Accuracy: 0.1403, Precision: 0.7008, Recall: 0.6153, F1 Score: 0.6416
Epoch 2
-------------------------------
Train loss: 0.624622  [    0/ 5393]
Train loss: 0.549056  [ 1600/ 5393]
Train loss: 0.539200  [ 3200/ 5393]
Train loss: 0.526498  [ 4800/ 5393]
Loss: 0.5479
AUC for label 0: 0.5386101955982032
AUC for label 1: 0.6604704186155679
AUC for label 2: 0.5516912357136463
AUC for label 3: 0.5233748765621276
Validation - Accuracy: 0.1060, Precision: 0.7342, Recall: 0.4895, F1 Score: 0.5528
Epoch 3
-------------------------------
Train loss: 0.596656  [    0/ 5393]
Train loss: 0.534782  [ 1600/ 5393]
Train loss: 0.552169  [ 3200/ 5393]
Trai

Printing the classification_report for BERT w/C

In [42]:
generate_classification_report(c_model, test_dataset_c, labels_test)

KeyError: 'input_ids'

### TRAINING BERT W/CP

In [43]:
for epoch in range(epochs):
    print(f"Epoch {epoch + 1}\n-------------------------------")
    avg_train_loss_cp = trainBert(cp_model, train_dataloader_cp, optimizer, loss_function)
    print(f'Loss: {avg_train_loss_cp:.4f}')
    #validation
    accuracy_cp, precision_cp, recall_cp, f1_cp, cp_thresholds = validate_model(cp_model, val_dataloader_cp)
    print(f'Validation - Accuracy: {accuracy_cp:.4f}, Precision: {precision_cp:.4f}, Recall: {recall_cp:.4f}, F1 Score: {f1_cp:.4f}')

torch.save(cp_model.state_dict(), 'model.pthcp')

Epoch 1
-------------------------------
Train loss: 0.627758  [    0/ 5393]
Train loss: 0.633907  [ 1600/ 5393]
Train loss: 0.658979  [ 3200/ 5393]
Train loss: 0.625966  [ 4800/ 5393]
Loss: 0.6745
AUC for label 0: 0.49786595137071815
AUC for label 1: 0.5219590157979737
AUC for label 2: 0.46428262361612604
AUC for label 3: 0.4591301460823373
Validation - Accuracy: 0.0401, Precision: 0.7302, Recall: 0.5043, F1 Score: 0.4236
Epoch 2
-------------------------------
Train loss: 0.638482  [    0/ 5393]
Train loss: 0.633286  [ 1600/ 5393]
Train loss: 0.644881  [ 3200/ 5393]
Train loss: 0.622176  [ 4800/ 5393]
Loss: 0.6718
AUC for label 0: 0.49786595137071815
AUC for label 1: 0.5219590157979737
AUC for label 2: 0.46428262361612604
AUC for label 3: 0.4591301460823373
Validation - Accuracy: 0.0401, Precision: 0.7302, Recall: 0.5043, F1 Score: 0.4236
Epoch 3
-------------------------------
Train loss: 0.638155  [    0/ 5393]
Train loss: 0.618516  [ 1600/ 5393]
Train loss: 0.627788  [ 3200/ 5393]


### TRAINING BERT W/CPS

In [44]:
for epoch in range(epochs):
    print(f"Epoch {epoch + 1}\n-------------------------------")
    avg_train_loss_cps = trainBert(cps_model, train_dataloader_cps, optimizer, loss_function)
    print(f'Loss: {avg_train_loss_cps:.4f}')
    #validation
    accuracy_cps, precision_cps, recall_cps, f1_cps, cps_thresholds = validate_model(cps_model, val_dataloader_cps)
    print(f'Validation - Accuracy: {accuracy_cps:.4f}, Precision: {precision_cps:.4f}, Recall: {recall_cps:.4f}, F1 Score: {f1_cps:.4f}')

torch.save(cps_model.state_dict(), 'model.pthcps')

Epoch 1
-------------------------------
Train loss: 0.699721  [    0/ 5393]
Train loss: 0.688629  [ 1600/ 5393]
Train loss: 0.676356  [ 3200/ 5393]
Train loss: 0.710600  [ 4800/ 5393]
Loss: 0.7335
AUC for label 0: 0.5163626339984023
AUC for label 1: 0.46930878975339063
AUC for label 2: 0.45422771627226877
AUC for label 3: 0.498875438417271
Validation - Accuracy: 0.0327, Precision: 0.7634, Recall: 0.2412, F1 Score: 0.2844
Epoch 2
-------------------------------
Train loss: 0.751042  [    0/ 5393]
Train loss: 0.678805  [ 1600/ 5393]
Train loss: 0.676573  [ 3200/ 5393]
Train loss: 0.703097  [ 4800/ 5393]
Loss: 0.7355
AUC for label 0: 0.5163626339984023
AUC for label 1: 0.46930878975339063
AUC for label 2: 0.45422771627226877
AUC for label 3: 0.498875438417271
Validation - Accuracy: 0.0327, Precision: 0.7634, Recall: 0.2412, F1 Score: 0.2844
Epoch 3
-------------------------------
Train loss: 0.681916  [    0/ 5393]
Train loss: 0.698266  [ 1600/ 5393]
Train loss: 0.688345  [ 3200/ 5393]
Tr