In [1]:
import pandas as pd
import numpy as np

import torch
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification
#from transformers import Trainer, TrainingArguments
from transformers import EvalPrediction
from torch.utils.data import DataLoader, Dataset
from torch.nn import BCEWithLogitsLoss
from torch.optim import Adam

from sklearn.metrics import f1_score

## TASK 1 - CORPUS

In [2]:
def load_and_merge_data():
    
    #encodng the data into pandas.DataFrame objects
    url_a_test = 'Data/arguments-test.tsv'
    df_a_test = pd.read_csv(url_a_test, sep='\t')

    url_a_training = 'Data/arguments-training.tsv'
    df_a_training = pd.read_csv(url_a_training, sep='\t')

    url_a_validation = 'Data/arguments-validation.tsv'
    df_a_validation = pd.read_csv(url_a_validation, sep='\t')

    url_l_test = 'Data/labels-test.tsv'
    df_l_test = pd.read_csv(url_l_test, sep='\t')

    url_l_training = 'Data/labels-training.tsv'
    df_l_training = pd.read_csv(url_l_training, sep='\t')

    url_l_validation = 'Data/labels-validation.tsv'
    df_l_validation = pd.read_csv(url_l_validation, sep='\t')

    #merge argument dataframes with label dataframes
    df_test = pd.merge(df_a_test, df_l_test, on='Argument ID')
    df_training = pd.merge(df_a_training, df_l_training, on='Argument ID')
    df_validation = pd.merge(df_a_validation, df_l_validation, on='Argument ID')

    return df_test, df_training, df_validation

In [3]:
df_test, df_training, df_validation = load_and_merge_data()

In [4]:
def merge_and_drop_columns(df):
    # Merge level 2 annotations to level 3 categories
    df['Openess to change'] = df[['Self-direction: thought', 'Self-direction: action', 'Stimulation', 'Hedonism']].any(axis=1).astype(int)
    df['Self-enhancement'] = df[['Hedonism', 'Achievement', 'Power: dominance', 'Power: resources', 'Face']].any(axis=1).astype(int)
    df['Conservation'] = df[['Face', 'Security: personal', 'Security: societal', 'Tradition', 'Conformity: rules', 'Conformity: interpersonal', 'Humility']].any(axis=1).astype(int)
    df['Self-transcendence'] = df[['Humility', 'Benevolence: caring', 'Benevolence: dependability', 'Universalism: concern', 'Universalism: nature', 'Universalism: tolerance', 'Universalism: objectivity']].any(axis=1).astype(int)
    
    # Drop unuseful columns
    columns_to_drop = ['Argument ID', 'Self-direction: thought', 'Self-direction: action', 'Stimulation', 'Hedonism', 'Achievement', 'Power: dominance', 'Power: resources', 'Face', 'Security: personal', 'Security: societal', 'Tradition', 'Conformity: rules', 'Conformity: interpersonal', 'Humility', 'Benevolence: caring', 'Benevolence: dependability', 'Universalism: concern', 'Universalism: nature', 'Universalism: tolerance', 'Universalism: objectivity']
    df = df.drop(columns=columns_to_drop)
    
    return df

df_test = merge_and_drop_columns(df_test)
df_training = merge_and_drop_columns(df_training)
df_validation = merge_and_drop_columns(df_validation)


### DATA EXPLORATION
Ancora da inserire

### DATA PREPROCESSING

Encoding 'Stance' column into numerical format  

In [5]:
df_training['Stance'] = df_training['Stance'].replace({'in favor of': 1, 'against': 0}).astype(int)
df_validation['Stance'] = df_validation['Stance'].replace({'in favor of': 1, 'against': 0}).astype(int)
df_test['Stance'] = df_test['Stance'].replace({'in favor of': 1, 'against': 0}).astype(int)

Preparing data for tokenization input

In [6]:
labels_test = df_test.iloc[:, 3:7].values
labels_training = df_training.iloc[:, 3:7].values
labels_validation = df_validation.iloc[:, 3:7].values

stance_test = df_test['Stance'].values
stance_training = df_training['Stance'].values
stance_validation = df_validation['Stance'].values



In [7]:
from datasets import Dataset
ds_test = Dataset.from_pandas(df_test)
ds_training = Dataset.from_pandas(df_training)
ds_validation = Dataset.from_pandas(df_validation)

Tokenization process

In [18]:
model_name = 'bert-base-uncased'
max_len = 100
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenization(ds, stance, model_type):
    # Initialize lists to store the results
    input_ids = []
    attention_masks = []
    token_type_ids = []
 
    if model_type == 'c':
        c_texts = ds['Conclusion']
        
        for text in c_texts:
            encoded_dict = tokenizer.encode_plus(
                text,
                add_special_tokens=True,
                max_length=max_len,
                padding='max_length',
                return_token_type_ids=True,
                truncation=True,
                return_attention_mask=True,
                return_tensors='pt'
            )
            input_ids.append(encoded_dict['input_ids'])
            attention_masks.append(encoded_dict['attention_mask'])
            token_type_ids.append(encoded_dict['token_type_ids'])
            
        # Convert lists to tensors
        input_ids = torch.cat(input_ids, dim=0)
        attention_masks = torch.cat(attention_masks, dim=0)
        token_type_ids = torch.cat(token_type_ids, dim=0)

        # Combine the results into a dictionary
        df_c_inputs = {
            'input_ids': input_ids,
            'attention_mask': attention_masks,
            'token_type_ids': token_type_ids
        }
        return df_c_inputs
    
    elif model_type == 'cp':
        # Extract the list of texts for tokenization of BERT_cp and BERT_cps model inputs
        cp_texts = ds['Conclusion']+[" "]+ds['Premise']

        for text in cp_texts:
            encoded_dict = tokenizer.encode_plus(
                        text,
                        add_special_tokens=True,
                        max_length=max_len,
                        padding='max_length',
                        return_token_type_ids=True,
                        truncation=True,
                        return_attention_mask=True,
                        return_tensors='pt'
            )
            input_ids.append(encoded_dict['input_ids'])
            attention_masks.append(encoded_dict['attention_mask'])
            token_type_ids.append(encoded_dict['token_type_ids'])

        # Convert lists to tensors
        input_ids = torch.cat(input_ids, dim=0)
        attention_masks = torch.cat(attention_masks, dim=0)
        token_type_ids = torch.cat(token_type_ids, dim=0)

        # Combine the results into a dictionary
        df_cp_inputs = {
            'input_ids': input_ids,
            'attention_mask': attention_masks,
            'token_type_ids': token_type_ids
        }
        return df_cp_inputs   
    
    elif model_type == 'cps': 
        # Extract the list of texts for tokenization of BERT_cp and BERT_cps model inputs
        cps_texts = ds['Conclusion']+[" "]+ds['Premise']
        stance = []

        for text in cps_texts:
            encoded_dict = tokenizer.encode_plus(
                        text,
                        add_special_tokens=True,
                        max_length=max_len,
                        padding='max_length',
                        return_token_type_ids=True,
                        truncation=True,
                        return_attention_mask=True,
                        return_tensors='pt'
            )
            input_ids.append(encoded_dict['input_ids'])
            attention_masks.append(encoded_dict['attention_mask'])
            token_type_ids.append(encoded_dict['token_type_ids'])

        # Convert lists to tensors
        input_ids = torch.cat(input_ids, dim=0)
        attention_masks = torch.cat(attention_masks, dim=0)
        token_type_ids = torch.cat(token_type_ids, dim=0)
        stance = torch.tensor(stance, dtype=torch.float)

        # Combine the results into a dictionary
        df_cps_inputs = {
            'input_ids': input_ids,
            'attention_mask': attention_masks,
            'token_type_ids': token_type_ids,
            'stance': stance
        }
        return df_cps_inputs   
        

In [19]:
train_dataset_c = tokenization(ds_training, stance_training, 'c')
train_dataset_cp = tokenization(ds_training, stance_training, 'cp')
train_dataset_cps = tokenization(ds_training, stance_training, 'cps')

val_dataset_c = tokenization(ds_validation, stance_validation, 'c')
val_dataset_cp = tokenization(ds_validation, stance_validation, 'cp')
val_dataset_cps = tokenization(ds_validation, stance_validation, 'cps')

test_dataset_c = tokenization(ds_test, stance_test, 'c')
test_dataset_cp = tokenization(ds_test, stance_test, 'cp')
test_dataset_cps = tokenization(ds_test, stance_test, 'cps')

DATA LOADER - BATCH

In [61]:
batch_size = 16

In [62]:
class DatasetCreator(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels
    
    def __len__(self):
        return len(self.encodings)
    
    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

'''
    def __len__(self):
        return len(self.labels)
    
    
'''

'\n    def __len__(self):\n        return len(self.labels)\n    \n    \n'

In [63]:
tr_dataset_c = DatasetCreator(train_dataset_c, labels_training)
tr_dataset_cp = DatasetCreator(train_dataset_cp, labels_training)
tr_dataset_cps = DatasetCreator(train_dataset_cps, labels_training)

v_dataset_c = DatasetCreator(val_dataset_c, labels_validation)
v_dataset_cp = DatasetCreator(val_dataset_cp, labels_validation)
v_dataset_cps = DatasetCreator(val_dataset_cps, labels_validation)

te_dataset_c = DatasetCreator(test_dataset_c, labels_test)
te_dataset_cp = DatasetCreator(test_dataset_cp, labels_test)
te_dataset_cps = DatasetCreator(test_dataset_cps, labels_test)

In [64]:
print(tr_dataset_c.__getitem__(0))

{'input_ids': tensor([  101,  2057,  2323,  7221,  2529, 18856, 13369,   102,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0]), 'attention_mask': tensor([1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 

  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


In [65]:
train_dataloaderc = DataLoader(tr_dataset_c, batch_size=batch_size)
val_dataloaderc = DataLoader(v_dataset_c, batch_size=batch_size)
test_dataloaderc = DataLoader(te_dataset_c, batch_size=batch_size)

train_dataloadercp = DataLoader(tr_dataset_cp, batch_size=batch_size)
val_dataloadercp = DataLoader(v_dataset_cp, batch_size=batch_size)
test_dataloadercp = DataLoader(te_dataset_cp, batch_size=batch_size)

train_dataloadercps = DataLoader(tr_dataset_cps, batch_size=batch_size)
val_dataloadercps = DataLoader(v_dataset_cps, batch_size=batch_size)
test_dataloadercps = DataLoader(te_dataset_cps, batch_size=batch_size)

In [74]:
for batch in train_dataloaderc:
    ids_batch, att_mask_batch, types_batch, label_batch = batch
    print("Input IDs:", len(ids_batch)) 
    print("Attention Mask:", len(att_mask_batch))
    print("Token Type IDs:", len(types_batch))
    print("Labels:", len(label_batch))

for batch in val_dataloaderc:
    ids_batch, att_mask_batch, types_batch, label_batch = batch
    print("Input IDs:", len(ids_batch)) 
    print("Attention Mask:", len(att_mask_batch))
    print("Token Type IDs:", len(types_batch))
    print("Labels:", len(label_batch))

for batch in test_dataloaderc:
    ids_batch, att_mask_batch, types_batch, label_batch = batch
    print("Input IDs:", len(ids_batch)) 
    print("Attention Mask:", len(att_mask_batch))
    print("Token Type IDs:", len(types_batch))
    print("Labels:", len(label_batch))

Input IDs: 9
Attention Mask: 14
Token Type IDs: 14
Labels: 6
Input IDs: 9
Attention Mask: 14
Token Type IDs: 14
Labels: 6
Input IDs: 9
Attention Mask: 14
Token Type IDs: 14
Labels: 6


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


## TASK 2 - MODEL DEFINITION

### BASELINE MODELS

Random uniform classifier

In [31]:
def create_random_uniform_classifier(category):
    """
    Creates a random classifier predicting 0 or 1 with uniform probability.
    inputs:
        category: Category to predict
    outputs: 
        a function that generates random predictions
    """
    def random_uniform_classifier(size):
        """
        Generates random uniform predictions for the given category.
        inputs: 
            size: number of predictions to generate
        outputs: 
            array of random uniform predictions
        """
        return np.random.choice([0, 1], size=size)
    
    return random_uniform_classifier

Majority classifier

In [32]:
def create_majority_classifier(category, majority_value):
    """
    Creates a majority classifier always predicting the most frequent valorization for the column.
    inputs:
        category: Category to predict
        majority_value: most frequent value (0 or 1)
    outputs:
        a function that generates majority predictions
    """
    def majority_classifier(size):
        """
        Generates majority predictions for the given category.
        inputs: 
            size: number of predictions to generate
        outputs: 
            array of majority predictions
        """
        return np.full(size, majority_value)
    
    return majority_classifier

Creating the baseline models for every category and saving them in a classifiers dictionary

In [None]:
classifiers = {}

categories = ['Openess to change', 'Self-enhancement', 'Conservation', 'Self-transcendence']

#create classifiers for each category and save them in the dictionary
for category in categories:
    #random uniform classifier
    random_uniform_name = f'random_uniform_classifier_{category}'
    classifiers[random_uniform_name] = create_random_uniform_classifier(category)

    #majority classifier
    majority_name = f'majority_classifier_{category}'
    classifiers[majority_name] = create_majority_classifier(category, majority_value=1) #da capire perchè majority_value=1

### BERT MODELS

#### BERT w/C

In [75]:
from transformers import BertModel, BertConfig

# Carica la configurazione del modello
config = BertConfig.from_pretrained('bert-base-uncased')
print(f"Hidden size: {config.hidden_size}")

Hidden size: 768


In [None]:
''' 
Verifica i dati di input: Assicurati che i dati di input (input_ids, attention_mask, token_type_ids) siano corretti e abbiano le dimensioni 
previste. Le dimensioni di input_ids dovrebbero essere [batch_size, outputs.length] quindi [16, 768]
'''
print(f"Input IDs shape: {input_ids.shape}")
print(f"Attention mask shape: {attention_mask.shape}")
print(f"Token type IDs shape: {token_type_ids.shape}")

In [77]:
#definition of the C_Model class for the first BERT-based model
class C_Model(torch.nn.Module):
    def __init__(self, model_name):
        super(C_Model, self).__init__()
        self.bert = AutoModelForSequenceClassification.from_pretrained(
            model_name, 
            problem_type='multi_label_classification', 
            num_labels = 4, 
            return_dict=False)
        self.dropout = torch.nn.Dropout(p=0.3)
        self.classifier = torch.nn.Linear(4, 4)

    def forward(self, input_ids, attention_mask, token_type_ids):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
        print(f"Numero di valori restituiti: {len(outputs) if isinstance(outputs, tuple) else 1}")
        print(f"Tipo outputs: {type(outputs)}")
        print("Output: ", outputs[0])
        #print(f"outputs logits: {outputs.logits}")
        outputs = self.dropout(outputs[0])
        outputs = self.classifier(outputs)
        return outputs

'''
class roBERTa(torch.nn.Module):
    def __init__(self, model_name):
        super(roBERTa, self).__init__()
        self.roberta = AutoModel.from_pretrained(model_name, return_dict=False)
        self.dropout = torch.nn.Dropout(p=0.3)
        self.classifier = torch.nn.Linear(output_channels, 4)
        
    def forward(self, ids, mask, token_type_ids):
        _, output = self.roberta(ids, attention_mask=mask, token_type_ids=token_type_ids)
        output = self.dropout(output)
        output = self.classifier(output)
        return output
'''

'\nclass roBERTa(torch.nn.Module):\n    def __init__(self, model_name):\n        super(roBERTa, self).__init__()\n        self.roberta = AutoModel.from_pretrained(model_name, return_dict=False)\n        self.dropout = torch.nn.Dropout(p=0.3)\n        self.classifier = torch.nn.Linear(output_channels, 4)\n        \n    def forward(self, ids, mask, token_type_ids):\n        _, output = self.roberta(ids, attention_mask=mask, token_type_ids=token_type_ids)\n        output = self.dropout(output)\n        output = self.classifier(output)\n        return output\n'

#### BERT w/CP

In [78]:
#definition of the CP_Model class for the second BERT-based model
class CP_Model(torch.nn.Module):
    def __init__(self, model_name):
        super(CP_Model, self).__init__()
        self.bert = AutoModelForSequenceClassification.from_pretrained(
            model_name, 
            problem_type='multi_label_classification', 
            num_labels = 4)
        self.classifier = torch.nn.Linear(self.bert.config.hidden_size, 4)

    def forward(self, input_ids, attention_mask, token_type_ids):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
        sequence_output = outputs.logits
        logits = self.classifier(sequence_output)
        return logits
    

#### BERT w/CPS

In [79]:
#definition of the CPS_Model class for the third BERT-based model

class CPS_Model(torch.nn.Module):
    def __init__(self, model_name):
        super(CPS_Model, self).__init__()
        self.bert = AutoModelForSequenceClassification.from_pretrained(
            model_name, 
            problem_type='multi_label_classification', 
            num_labels = 4)
        self.classifier = torch.nn.Linear(16, 4)

    def forward(self, input_ids, attention_mask, token_type_ids, stance):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
        sequence_output = outputs.logits
        stance = stance.unsqueeze(1).float() 
        combined_output = torch.cat((sequence_output, stance), dim=1)
        logits = self.classifier(combined_output)
        return logits
    

#### Bert Models

In [82]:
c_model = C_Model(model_name)
cp_model = CP_Model(model_name)
cps_model = CPS_Model(model_name)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [83]:
print(c_model)

C_Model(
  (bert): BertForSequenceClassification(
    (bert): BertModel(
      (embeddings): BertEmbeddings(
        (word_embeddings): Embedding(30522, 768, padding_idx=0)
        (position_embeddings): Embedding(512, 768)
        (token_type_embeddings): Embedding(2, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (encoder): BertEncoder(
        (layer): ModuleList(
          (0-11): 12 x BertLayer(
            (attention): BertAttention(
              (self): BertSdpaSelfAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_features=768, bias=True)
                (dropout): Dropout(p=0.1, inplace=False)
              )
              (output): BertSelfOutput(
                (dense): Linear(in_features=768, out_features=768, bia

## TASK 3 - METRICS

In [84]:
#defining per-category F1 score metric
def calculate_per_category_f1(y_true, y_pred):
    categories = ['Openess to change', 'Self-enhancement', 'Conservation', 'Self-transcendence']
    category_f1_scores = {}
    for category in categories:
        # Filter true and predicted labels for the current category
        category_indices = [i for i, cat in enumerate(y_true['category']) if cat == category]
        category_y_true = [y_true['Stance'][i] for i in category_indices]
        category_y_pred = [y_pred[i] for i in category_indices]
        
        # Calculate F1 score for the current category
        f1 = f1_score(category_y_true, category_y_pred, average='binary')
        category_f1_scores[category] = f1
    return category_f1_scores

#defining macro F1 score metric
def calculate_macro_f1(category_f1_scores):
    average_f1 = np.mean(list(category_f1_scores.values()))
    return average_f1

#defining the EvalPrediction object for Trainer
def calculate_metrics(p: EvalPrediction):
    preds = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions
    labels = p.label_ids
    
    # F1 score per category
    category_f1_scores = calculate_per_category_f1(labels, preds)
    
    # F1 score macro
    macro_f1 = calculate_macro_f1(category_f1_scores)
    
    #results
    result = {
        'category_f1_scores': category_f1_scores,
        'macro_f1': macro_f1
    }
    
    return result

## TASK 4 - TRAINING AND EVALUATION

Training phase utils

In [85]:
#definition of the loss function
def loss(outputs, targets):
    return BCEWithLogitsLoss()(outputs, targets)

#definition of the optimizers
optimizer = Adam(c_model.parameters(), lr = 1e-5)

# Set seeds for reproducibility
def set_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)

seeds = [42, 123, 2024]
#seeds = [456]

epochs = 10

Training function definitions

In [91]:
def trainBert(model, dataloader, optimizer, loss):
    size = len(dataloader.dataset)
    print(f"Size: {size}")
    model.train()
    for batch, data in enumerate(dataloader, 0):
        ids = data['input_ids'].to(dtype=torch.long)
        print(f"Input IDs: {ids.shape}")
        mask = data['attention_mask'].to(dtype=torch.long)
        print(f"Attention Mask: {mask.shape}")
        token_type_ids = data['token_type_ids'].to(dtype=torch.long)
        labels = data['labels'].to(dtype=torch.float)
        print(f"Labels: {labels.shape}")
        optimizer.zero_grad()
        if model == cps_model:
            stance = data['stance'].to(dtype=torch.float)
            print(f"Stance: {stance.shape}")
            outputs = model(ids, mask, token_type_ids, stance)
            print(f"Outputs: {outputs.shape}")
        else:
            outputs = model(ids, mask, token_type_ids)
            print(f"Outputs: {outputs.shape}")
            
        loss = loss(outputs, labels)
        loss.backward()
        optimizer.step()
  
        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(ids)
            print(f"Train loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [92]:
for epoch in range(epochs):
    print(f"Epoch {epoch + 1}\n-------------------------------")
    trainBert(c_model, train_dataloaderc, optimizer, loss)

Epoch 1
-------------------------------
Size: 3
Input IDs: torch.Size([3, 100])
Attention Mask: torch.Size([3, 100])
Labels: torch.Size([3, 4])


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


Numero di valori restituiti: 1
Tipo outputs: <class 'tuple'>
Output:  tensor([[ 1.0968,  0.3270, -1.3280,  1.1224],
        [ 0.9729,  0.2507, -1.2706,  1.2562],
        [ 0.8140,  0.8020, -0.8508,  1.5425]], grad_fn=<AddmmBackward0>)
Outputs: torch.Size([3, 4])
Train loss: 0.447778  [    0/    3]
Epoch 2
-------------------------------
Size: 3
Input IDs: torch.Size([3, 100])
Attention Mask: torch.Size([3, 100])
Labels: torch.Size([3, 4])
Numero di valori restituiti: 1
Tipo outputs: <class 'tuple'>
Output:  tensor([[ 0.9967,  0.1621, -1.5715,  0.9273],
        [ 1.0613,  0.2631, -1.5023,  0.7999],
        [ 0.9110,  0.8472, -0.9868,  1.6037]], grad_fn=<AddmmBackward0>)
Outputs: torch.Size([3, 4])
Train loss: 0.435453  [    0/    3]
Epoch 3
-------------------------------
Size: 3
Input IDs: torch.Size([3, 100])
Attention Mask: torch.Size([3, 100])
Labels: torch.Size([3, 4])
Numero di valori restituiti: 1
Tipo outputs: <class 'tuple'>
Output:  tensor([[ 0.8720,  0.1776, -1.4193,  1.0252]