# Working with First Party Dataset:

In [None]:
!pip install transformers datasets

Collecting datasets
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.2.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m12.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m10.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl 

In [None]:
# Set up
from datasets import Dataset
import pandas as pd
from transformers import DistilBertForSequenceClassification, DistilBertTokenizer
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import accuracy_score, f1_score


In [None]:
# Short exploration with pandas
dataframe = pd.read_csv("First_Party_Collection_or_Use.csv")

# About data
print(dataframe["Collection Mode"].unique()) # --> 4 unique possbile values
print(len(dataframe["Personal Information Type"].unique())) # --> 16  possible unique values
print(len(dataframe["Purpose"].unique())) # --> 11 unique possible values

dataframe.head()

['Explicit' 'Unspecified' 'not-selected' 'Implicit']
16
11


Unnamed: 0,annotationID,segmentID,category,segment,Collection Mode,Personal Information Type,Purpose
0,20324,1,First Party Collection/Use,Information that Sci-News.com May Collect Onli...,Explicit,Unspecified,Basic service/feature
1,20325,1,First Party Collection/Use,Information that Sci-News.com May Collect Onli...,Explicit,Contact,Basic service/feature
2,20327,3,First Party Collection/Use,- details of your visits to our site including...,Unspecified,Unspecified,Unspecified
3,20328,3,First Party Collection/Use,- details of your visits to our site including...,Unspecified,User online activities,Unspecified
4,20329,3,First Party Collection/Use,- details of your visits to our site including...,Unspecified,Location,Unspecified


In [None]:
# Preprocessing
# split data
train_df, eval_df = train_test_split(dataframe, test_size=0.2, random_state=42)

# Encode labels

# Initialize a label encoder for each target column
collection_mode_encoder = LabelEncoder()
personal_info_type_encoder = LabelEncoder()
purpose_encoder = LabelEncoder()

# encode training dataset
train_df['Collection Mode'] = collection_mode_encoder.fit_transform(train_df['Collection Mode'])
train_df['Personal Information Type'] = personal_info_type_encoder.fit_transform(train_df['Personal Information Type'])
train_df['Purpose'] = purpose_encoder.fit_transform(train_df['Purpose'])

# Encode eval dataset
eval_df['Collection Mode'] = collection_mode_encoder.fit_transform(eval_df['Collection Mode'])
eval_df['Personal Information Type'] = personal_info_type_encoder.fit_transform(eval_df['Personal Information Type'])
eval_df['Purpose'] = purpose_encoder.fit_transform(eval_df['Purpose'])

# # transform to huggingface dataset
# train_dataset = Dataset.from_pandas(train_df)
# eval_dataset = Dataset.from_pandas(eval_df)


In [None]:
# Tokenize
import torch

tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

# Tokenize the texts in the DataFrame
inputs = tokenizer(list(train_df['segment']), padding=True, truncation=True, max_length=512, return_tensors="pt")
inputs_eval = tokenizer(list(eval_df['segment']), padding=True, truncation=True, max_length=512, return_tensors="pt")

# Convert labels to tensors
collection_mode_labels = torch.tensor(train_df['Collection Mode'].values)
personal_information_labels = torch.tensor(train_df['Personal Information Type'].values)
purpose_labels = torch.tensor(train_df['Purpose'].values)

collection_mode_labels_eval = torch.tensor(eval_df['Collection Mode'].values)
personal_information_labels_eval = torch.tensor(eval_df['Personal Information Type'].values)
purpose_labels_eval = torch.tensor(eval_df['Purpose'].values)

# Create a TensorDataset
train_dataset = TensorDataset(inputs['input_ids'], inputs['attention_mask'], collection_mode_labels, personal_information_labels, purpose_labels)
eval_dataset = TensorDataset(inputs_eval['input_ids'], inputs_eval['attention_mask'], collection_mode_labels_eval, personal_information_labels_eval, purpose_labels_eval)



# Create a DataLoader
train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True) # YOU CAN EDIT THIS ARGUMENT LATER AS YOU WANT
eval_dataloader = DataLoader(eval_dataset, batch_size=16, shuffle = True )


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

In [None]:
# take a small subset of data for testing
train_subset = torch.utils.data.Subset(train_dataset, range(80))
eval_subset = torch.utils.data.Subset(eval_dataset, range(20))

train_subset_dataloader = DataLoader(train_subset, batch_size=16, shuffle=True)
eval_subset_dataloader = DataLoader(eval_subset, batch_size=16, shuffle=True)


In [None]:
print(len(eval_dataset))
print(inputs_eval['input_ids'].shape)
print(inputs_eval['attention_mask'].shape)
print(collection_mode_labels_eval.shape)
print(personal_information_labels_eval.shape)
print(purpose_labels_eval.shape)

1787
torch.Size([1787, 505])
torch.Size([1787, 505])
torch.Size([1787])
torch.Size([1787])
torch.Size([1787])


In [None]:
# Adjust model for multitask case
from transformers import DistilBertModel, PreTrainedModel, DistilBertConfig
import torch.nn as nn

class DistilBertForMultiTask(PreTrainedModel):
    def __init__(self, config, num_labels_task1, num_labels_task2, num_labels_task3):
        super().__init__(config)
        self.distilbert = DistilBertModel(config)

        # Output heads for each task
        self.classifier_task1 = nn.Linear(config.dim, num_labels_task1)
        self.classifier_task2 = nn.Linear(config.dim, num_labels_task2)
        self.classifier_task3 = nn.Linear(config.dim, num_labels_task3)

    def forward(self, input_ids, attention_mask=None, labels_task1=None, labels_task2=None, labels_task3=None):
        outputs = self.distilbert(input_ids, attention_mask=attention_mask)
        pooled_output = outputs[0][:, 0]  # Take <CLS> token hidden state

        logits_task1 = self.classifier_task1(pooled_output)
        logits_task2 = self.classifier_task2(pooled_output)
        logits_task3 = self.classifier_task3(pooled_output)

        return logits_task1, logits_task2, logits_task3



In [None]:
# Initialize the configuration manually if needed
config = DistilBertConfig.from_pretrained('distilbert-base-uncased')

# Now initialize the model with the configuration and number of labels for each task
model = DistilBertForMultiTask(config, num_labels_task1=4, num_labels_task2=16, num_labels_task3=11)

In [None]:
from transformers import AdamW
import torch

# Initialize the optimizer
optimizer = AdamW(model.parameters(), lr=5e-5)

# Loss functions for each task
loss_fn_task1 = torch.nn.CrossEntropyLoss()
loss_fn_task2 = torch.nn.CrossEntropyLoss()
loss_fn_task3 = torch.nn.CrossEntropyLoss()




In [None]:
# Training loop with logs
# Move model to GPU or CPU
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)
print(f"Training on device: {device}")

for epoch in range(15):  # Number of epochs
    model.train()
    total_loss_epoch = 0  # To accumulate the loss for the epoch
    for batch_idx, batch in enumerate(train_dataloader):
        # Assuming the batch is a list of tensors, unpack them
        input_ids, attention_mask, labels_task1, labels_task2, labels_task3 = batch

        # Move tensors to device
        input_ids = input_ids.to(device)
        attention_mask = attention_mask.to(device)
        labels_task1 = labels_task1.to(device)
        labels_task2 = labels_task2.to(device)
        labels_task3 = labels_task3.to(device)

        # Forward pass
        logits_task1, logits_task2, logits_task3 = model(input_ids=input_ids, attention_mask=attention_mask)

        # Compute the loss for each task
        loss_task1 = loss_fn_task1(logits_task1, labels_task1)
        loss_task2 = loss_fn_task2(logits_task2, labels_task2)
        loss_task3 = loss_fn_task3(logits_task3, labels_task3)

        # Total loss
        total_loss = loss_task1 + loss_task2 + loss_task3

        # Backpropagation
        optimizer.zero_grad()
        total_loss.backward()
        optimizer.step()

        total_loss_epoch += total_loss.item()  # Accumulate loss for the epoch

        # Log progress every 10 batches
        if batch_idx % 100 == 0:
            print(f"Epoch {epoch+1}/{15}, Batch {batch_idx}/{len(train_dataloader)}, Loss: {total_loss.item():.4f}")

    # Average loss for the epoch
    avg_loss_epoch = total_loss_epoch / len(train_dataloader)
    print(f"Epoch {epoch+1} completed. Average Loss for this epoch: {avg_loss_epoch:.4f}")

      # Evaluation code


    model.eval()
    with torch.no_grad():
        all_preds_task1 = []
        all_preds_task2 = []
        all_preds_task3 = []
        all_labels_task1 = []
        all_labels_task2 = []
        all_labels_task3 = []

        for batch in eval_subset_dataloader:
            # Unpack the batch directly (since it's a list of tensors, not a dictionary)
            input_ids, attention_mask, labels_task1, labels_task2, labels_task3 = batch

            # Move tensors to the device
            input_ids = input_ids.to(device)
            attention_mask = attention_mask.to(device)
            labels_task1 = labels_task1.to(device)
            labels_task2 = labels_task2.to(device)
            labels_task3 = labels_task3.to(device)

            # Forward pass
            logits_task1, logits_task2, logits_task3 = model(input_ids=input_ids, attention_mask=attention_mask)

            # Get predictions by taking the class with the highest logit value
            preds_task1 = logits_task1.argmax(dim=-1)
            preds_task2 = logits_task2.argmax(dim=-1)
            preds_task3 = logits_task3.argmax(dim=-1)

            # Collect predictions and true labels for metrics computation
            all_preds_task1.extend(preds_task1.cpu().numpy())
            all_preds_task2.extend(preds_task2.cpu().numpy())
            all_preds_task3.extend(preds_task3.cpu().numpy())

            all_labels_task1.extend(labels_task1.cpu().numpy())
            all_labels_task2.extend(labels_task2.cpu().numpy())
            all_labels_task3.extend(labels_task3.cpu().numpy())

        # Compute metrics for each task
        accuracy_task1 = accuracy_score(all_labels_task1, all_preds_task1)
        accuracy_task2 = accuracy_score(all_labels_task2, all_preds_task2)
        accuracy_task3 = accuracy_score(all_labels_task3, all_preds_task3)

        f1_task1 = f1_score(all_labels_task1, all_preds_task1, average='weighted')
        f1_task2 = f1_score(all_labels_task2, all_preds_task2, average='weighted')
        f1_task3 = f1_score(all_labels_task3, all_preds_task3, average='weighted')

        print(f"Accuracy Task 1: {accuracy_task1:.4f}, F1 Task 1: {f1_task1:.4f}")
        print(f"Accuracy Task 2: {accuracy_task2:.4f}, F1 Task 2: {f1_task2:.4f}")
        print(f"Accuracy Task 3: {accuracy_task3:.4f}, F1 Task 3: {f1_task3:.4f}")


Training on device: cuda
Epoch 1/15, Batch 0/447, Loss: 5.7818
Epoch 1/15, Batch 100/447, Loss: 5.0623
Epoch 1/15, Batch 200/447, Loss: 4.7836
Epoch 1/15, Batch 300/447, Loss: 4.7284
Epoch 1/15, Batch 400/447, Loss: 4.5618
Epoch 1 completed. Average Loss for this epoch: 4.8555
Accuracy Task 1: 0.5000, F1 Task 1: 0.4949
Accuracy Task 2: 0.2000, F1 Task 2: 0.1539
Accuracy Task 3: 0.3500, F1 Task 3: 0.2998
Epoch 2/15, Batch 0/447, Loss: 4.4575
Epoch 2/15, Batch 100/447, Loss: 3.4696
Epoch 2/15, Batch 200/447, Loss: 4.8449
Epoch 2/15, Batch 300/447, Loss: 3.7606
Epoch 2/15, Batch 400/447, Loss: 3.8209
Epoch 2 completed. Average Loss for this epoch: 4.0152
Accuracy Task 1: 0.7000, F1 Task 1: 0.7079
Accuracy Task 2: 0.3500, F1 Task 2: 0.3734
Accuracy Task 3: 0.3500, F1 Task 3: 0.3017
Epoch 3/15, Batch 0/447, Loss: 3.2746
Epoch 3/15, Batch 100/447, Loss: 3.3802
Epoch 3/15, Batch 200/447, Loss: 3.8182
Epoch 3/15, Batch 300/447, Loss: 3.6662
Epoch 3/15, Batch 400/447, Loss: 3.4825
Epoch 3 compl

In [None]:
from sklearn.metrics import accuracy_score, f1_score

# Evaluation loop
model.eval()
with torch.no_grad():
    all_preds_task1 = []
    all_preds_task2 = []
    all_preds_task3 = []
    all_labels_task1 = []
    all_labels_task2 = []
    all_labels_task3 = []

    for batch in eval_dataloader:
        # Unpack the batch directly (since it's a list of tensors, not a dictionary)
        input_ids, attention_mask, labels_task1, labels_task2, labels_task3 = batch

        # Move tensors to the device
        input_ids = input_ids.to(device)
        attention_mask = attention_mask.to(device)
        labels_task1 = labels_task1.to(device)
        labels_task2 = labels_task2.to(device)
        labels_task3 = labels_task3.to(device)

        # Forward pass
        logits_task1, logits_task2, logits_task3 = model(input_ids=input_ids, attention_mask=attention_mask)

        # Get predictions by taking the class with the highest logit value
        preds_task1 = logits_task1.argmax(dim=-1)
        preds_task2 = logits_task2.argmax(dim=-1)
        preds_task3 = logits_task3.argmax(dim=-1)

        # Collect predictions and true labels for metrics computation
        all_preds_task1.extend(preds_task1.cpu().numpy())
        all_preds_task2.extend(preds_task2.cpu().numpy())
        all_preds_task3.extend(preds_task3.cpu().numpy())

        all_labels_task1.extend(labels_task1.cpu().numpy())
        all_labels_task2.extend(labels_task2.cpu().numpy())
        all_labels_task3.extend(labels_task3.cpu().numpy())

    # Compute metrics for each task
    accuracy_task1 = accuracy_score(all_labels_task1, all_preds_task1)
    accuracy_task2 = accuracy_score(all_labels_task2, all_preds_task2)
    accuracy_task3 = accuracy_score(all_labels_task3, all_preds_task3)

    f1_task1 = f1_score(all_labels_task1, all_preds_task1, average='weighted')
    f1_task2 = f1_score(all_labels_task2, all_preds_task2, average='weighted')
    f1_task3 = f1_score(all_labels_task3, all_preds_task3, average='weighted')

    print(f"Accuracy Task 1: {accuracy_task1:.4f}, F1 Task 1: {f1_task1:.4f}")
    print(f"Accuracy Task 2: {accuracy_task2:.4f}, F1 Task 2: {f1_task2:.4f}")
    print(f"Accuracy Task 3: {accuracy_task3:.4f}, F1 Task 3: {f1_task3:.4f}")


Accuracy Task 1: 0.6128, F1 Task 1: 0.6013
Accuracy Task 2: 0.4790, F1 Task 2: 0.4501
Accuracy Task 3: 0.5165, F1 Task 3: 0.4899


In [None]:
# Save model after training and evaluation
# save model state
torch.save(model.state_dict(), 'first_party_model_state_dict.pth')

# save entire  model
torch.save(model, 'first_party_model_full.pth')


In [None]:
# # possible different approach in loss function
# from torch.nn import CrossEntropyLoss

# loss_fct = CrossEntropyLoss()
# loss_task1 = loss_fct(logits_task1.view(-1, num_labels_task1), labels_task1.view(-1))
# loss_task2 = loss_fct(logits_task2.view(-1, num_labels_task2), labels_task2.view(-1))
# loss_task3 = loss_fct(logits_task3.view(-1, num_labels_task3), labels_task3.view(-1))

# loss = loss_task1 + loss_task2 + loss_task3


In [None]:
# Decoding predictions
predicted_labels_task1 = collection_mode_encoder.inverse_transform(predictions_task1)
predicted_labels_task2 = personal_info_type_encoder.inverse_transform(predictions_task2)
predicted_labels_task3 = purpose_encoder.inverse_transform(predictions_task3)


# NEW CLASSIFIER: Third Party Dataset


In [None]:
# Short exploration with pandas
dataframe = pd.read_csv("Third_Party_Sharing_or_Collection.csv")

# About data
print(len(dataframe["Action Third Party"].unique())) # --> 6 unique possbile values
print(len(dataframe["Personal Information Type"].unique())) # --> 15  possible unique values
print(len(dataframe["Purpose"].unique())) # --> 11 unique possible values

dataframe.head()

6
15
11


Unnamed: 0,annotationID,segmentID,category,segment,Purpose,Personal Information Type,Action Third Party
0,20337,9,Third Party Sharing/Collection,Disclosure of Your Information <br> <br> Sci-N...,Unspecified,Generic personal information,Receive/Shared with
1,20597,7,Third Party Sharing/Collection,Please note that during or after your visits t...,Unspecified,Cookies and tracking elements,Track on first party website/app
2,20599,9,Third Party Sharing/Collection,Disclosure of Your Information <br> <br> Sci-N...,Unspecified,Generic personal information,Receive/Shared with
3,20248,7,Third Party Sharing/Collection,Please note that during or after your visits t...,Unspecified,Cookies and tracking elements,Track on first party website/app
4,20251,9,Third Party Sharing/Collection,Disclosure of Your Information <br> <br> Sci-N...,Unspecified,Generic personal information,Receive/Shared with


In [None]:
# Preprocessing
# split data
train_df, eval_df = train_test_split(dataframe, test_size=0.2, random_state=42)

# Encode labels

# Initialize a label encoder for each target column
action_encoder = LabelEncoder()
personal_info_type_encoder = LabelEncoder()
purpose_encoder = LabelEncoder()

# encode training dataset
train_df['Action Third Party'] = action_encoder.fit_transform(train_df['Action Third Party'])
train_df['Personal Information Type'] = personal_info_type_encoder.fit_transform(train_df['Personal Information Type'])
train_df['Purpose'] = purpose_encoder.fit_transform(train_df['Purpose'])

# Encode eval dataset
eval_df['Action Third Party'] = action_encoder.fit_transform(eval_df['Action Third Party'])
eval_df['Personal Information Type'] = personal_info_type_encoder.fit_transform(eval_df['Personal Information Type'])
eval_df['Purpose'] = purpose_encoder.fit_transform(eval_df['Purpose'])

In [None]:
# Tokenize
import torch

tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

# Tokenize the texts in the DataFrame
inputs = tokenizer(list(train_df['segment']), padding=True, truncation=True, max_length=512, return_tensors="pt")
inputs_eval = tokenizer(list(eval_df['segment']), padding=True, truncation=True, max_length=512, return_tensors="pt")

# Convert labels to tensors
action_labels = torch.tensor(train_df['Action Third Party'].values)
personal_information_labels = torch.tensor(train_df['Personal Information Type'].values)
purpose_labels = torch.tensor(train_df['Purpose'].values)

action_labels_eval = torch.tensor(eval_df['Action Third Party'].values)
personal_information_labels_eval = torch.tensor(eval_df['Personal Information Type'].values)
purpose_labels_eval = torch.tensor(eval_df['Purpose'].values)

# Create a TensorDataset
train_dataset = TensorDataset(inputs['input_ids'], inputs['attention_mask'], action_labels, personal_information_labels, purpose_labels)
eval_dataset = TensorDataset(inputs_eval['input_ids'], inputs_eval['attention_mask'], action_labels_eval, personal_information_labels_eval, purpose_labels_eval)



# Create a DataLoader
train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True) # YOU CAN EDIT THIS ARGUMENT LATER AS YOU WANT
eval_dataloader = DataLoader(eval_dataset, batch_size=16, shuffle = True )


In [None]:
# take a small subset of data for testing
train_subset = torch.utils.data.Subset(train_dataset, range(80))
eval_subset = torch.utils.data.Subset(eval_dataset, range(20))

train_subset_dataloader = DataLoader(train_subset, batch_size=16, shuffle=True)
eval_subset_dataloader = DataLoader(eval_subset, batch_size=16, shuffle=True)


In [None]:
# Adjust model for multitask case
from transformers import DistilBertModel, PreTrainedModel, DistilBertConfig
import torch.nn as nn

class DistilBertForMultiTask(PreTrainedModel):
    def __init__(self, config, num_labels_task1, num_labels_task2, num_labels_task3):
        super().__init__(config)
        self.distilbert = DistilBertModel(config)

        # Output heads for each task
        self.classifier_task1 = nn.Linear(config.dim, num_labels_task1)
        self.classifier_task2 = nn.Linear(config.dim, num_labels_task2)
        self.classifier_task3 = nn.Linear(config.dim, num_labels_task3)

    def forward(self, input_ids, attention_mask=None, labels_task1=None, labels_task2=None, labels_task3=None):
        outputs = self.distilbert(input_ids, attention_mask=attention_mask)
        pooled_output = outputs[0][:, 0]  # Take <CLS> token hidden state

        logits_task1 = self.classifier_task1(pooled_output)
        logits_task2 = self.classifier_task2(pooled_output)
        logits_task3 = self.classifier_task3(pooled_output)

        return logits_task1, logits_task2, logits_task3



In [None]:
# Initialize the configuration manually if needed
config = DistilBertConfig.from_pretrained('distilbert-base-uncased')

# Now initialize the model with the configuration and number of labels for each task
model = DistilBertForMultiTask(config, num_labels_task1=6, num_labels_task2=15, num_labels_task3=11)

In [None]:
from transformers import AdamW
import torch

# Initialize the optimizer
optimizer = AdamW(model.parameters(), lr=5e-5)

# Loss functions for each task
loss_fn_task1 = torch.nn.CrossEntropyLoss()
loss_fn_task2 = torch.nn.CrossEntropyLoss()
loss_fn_task3 = torch.nn.CrossEntropyLoss()




In [None]:
# Training loop with logs
# Move model to GPU or CPU
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)
print(f"Training on device: {device}")
EPOCHS = 10

for epoch in range(EPOCHS):  # Number of epochs
    model.train()
    total_loss_epoch = 0  # To accumulate the loss for the epoch
    for batch_idx, batch in enumerate(train_dataloader):
        # Assuming the batch is a list of tensors, unpack them
        input_ids, attention_mask, labels_task1, labels_task2, labels_task3 = batch

        # Move tensors to device
        input_ids = input_ids.to(device)
        attention_mask = attention_mask.to(device)
        labels_task1 = labels_task1.to(device)
        labels_task2 = labels_task2.to(device)
        labels_task3 = labels_task3.to(device)

        # Forward pass
        logits_task1, logits_task2, logits_task3 = model(input_ids=input_ids, attention_mask=attention_mask)

        # Compute the loss for each task
        loss_task1 = loss_fn_task1(logits_task1, labels_task1)
        loss_task2 = loss_fn_task2(logits_task2, labels_task2)
        loss_task3 = loss_fn_task3(logits_task3, labels_task3)

        # Total loss
        total_loss = loss_task1 + loss_task2 + loss_task3

        # Backpropagation
        optimizer.zero_grad()
        total_loss.backward()
        optimizer.step()

        total_loss_epoch += total_loss.item()  # Accumulate loss for the epoch

        # Log progress every 10 batches
        if batch_idx % 100 == 0:
            print(f"Epoch {epoch+1}/{EPOCHS}, Batch {batch_idx}/{len(train_dataloader)}, Loss: {total_loss.item():.4f}")


    # Average loss for the epoch
    avg_loss_epoch = total_loss_epoch / len(train_dataloader)
    print(f"Epoch {epoch+1} completed. Average Loss for this epoch: {avg_loss_epoch:.4f}")


    # Evaluation code


    model.eval()
    with torch.no_grad():
        all_preds_task1 = []
        all_preds_task2 = []
        all_preds_task3 = []
        all_labels_task1 = []
        all_labels_task2 = []
        all_labels_task3 = []

        for batch in eval_dataloader:
            # Unpack the batch directly (since it's a list of tensors, not a dictionary)
            input_ids, attention_mask, labels_task1, labels_task2, labels_task3 = batch

            # Move tensors to the device
            input_ids = input_ids.to(device)
            attention_mask = attention_mask.to(device)
            labels_task1 = labels_task1.to(device)
            labels_task2 = labels_task2.to(device)
            labels_task3 = labels_task3.to(device)

            # Forward pass
            logits_task1, logits_task2, logits_task3 = model(input_ids=input_ids, attention_mask=attention_mask)

            # Get predictions by taking the class with the highest logit value
            preds_task1 = logits_task1.argmax(dim=-1)
            preds_task2 = logits_task2.argmax(dim=-1)
            preds_task3 = logits_task3.argmax(dim=-1)

            # Collect predictions and true labels for metrics computation
            all_preds_task1.extend(preds_task1.cpu().numpy())
            all_preds_task2.extend(preds_task2.cpu().numpy())
            all_preds_task3.extend(preds_task3.cpu().numpy())

            all_labels_task1.extend(labels_task1.cpu().numpy())
            all_labels_task2.extend(labels_task2.cpu().numpy())
            all_labels_task3.extend(labels_task3.cpu().numpy())

        # Compute metrics for each task
        accuracy_task1 = accuracy_score(all_labels_task1, all_preds_task1)
        accuracy_task2 = accuracy_score(all_labels_task2, all_preds_task2)
        accuracy_task3 = accuracy_score(all_labels_task3, all_preds_task3)

        f1_task1 = f1_score(all_labels_task1, all_preds_task1, average='weighted')
        f1_task2 = f1_score(all_labels_task2, all_preds_task2, average='weighted')
        f1_task3 = f1_score(all_labels_task3, all_preds_task3, average='weighted')

        print(f"Accuracy Task 1: {accuracy_task1:.4f}, F1 Task 1: {f1_task1:.4f}")
        print(f"Accuracy Task 2: {accuracy_task2:.4f}, F1 Task 2: {f1_task2:.4f}")
        print(f"Accuracy Task 3: {accuracy_task3:.4f}, F1 Task 3: {f1_task3:.4f}")


Training on device: cuda
Epoch 1/10, Batch 0/261, Loss: 10.0544
Epoch 1/10, Batch 100/261, Loss: 4.4305
Epoch 1/10, Batch 200/261, Loss: 3.9578
Epoch 1 completed. Average Loss for this epoch: 4.5629
Accuracy Task 1: 0.7397, F1 Task 1: 0.6972
Accuracy Task 2: 0.4096, F1 Task 2: 0.3351
Accuracy Task 3: 0.4632, F1 Task 3: 0.3988
Epoch 2/10, Batch 0/261, Loss: 3.5099
Epoch 2/10, Batch 100/261, Loss: 4.3892
Epoch 2/10, Batch 200/261, Loss: 3.3825
Epoch 2 completed. Average Loss for this epoch: 3.5436
Accuracy Task 1: 0.7665, F1 Task 1: 0.7398
Accuracy Task 2: 0.5464, F1 Task 2: 0.5046
Accuracy Task 3: 0.5445, F1 Task 3: 0.5260
Epoch 3/10, Batch 0/261, Loss: 3.1156
Epoch 3/10, Batch 100/261, Loss: 3.4271
Epoch 3/10, Batch 200/261, Loss: 2.6537
Epoch 3 completed. Average Loss for this epoch: 3.0670
Accuracy Task 1: 0.7856, F1 Task 1: 0.7665
Accuracy Task 2: 0.5569, F1 Task 2: 0.5379
Accuracy Task 3: 0.5340, F1 Task 3: 0.5282
Epoch 4/10, Batch 0/261, Loss: 2.4495
Epoch 4/10, Batch 100/261, Los

In [None]:
from sklearn.metrics import accuracy_score, f1_score

# Evaluation loop
model.eval()
with torch.no_grad():
    all_preds_task1 = []
    all_preds_task2 = []
    all_preds_task3 = []
    all_labels_task1 = []
    all_labels_task2 = []
    all_labels_task3 = []

    for batch in eval_subset_dataloader:
        # Unpack the batch directly (since it's a list of tensors, not a dictionary)
        input_ids, attention_mask, labels_task1, labels_task2, labels_task3 = batch

        # Move tensors to the device
        input_ids = input_ids.to(device)
        attention_mask = attention_mask.to(device)
        labels_task1 = labels_task1.to(device)
        labels_task2 = labels_task2.to(device)
        labels_task3 = labels_task3.to(device)

        # Forward pass
        logits_task1, logits_task2, logits_task3 = model(input_ids=input_ids, attention_mask=attention_mask)

        # Get predictions by taking the class with the highest logit value
        preds_task1 = logits_task1.argmax(dim=-1)
        preds_task2 = logits_task2.argmax(dim=-1)
        preds_task3 = logits_task3.argmax(dim=-1)

        # Collect predictions and true labels for metrics computation
        all_preds_task1.extend(preds_task1.cpu().numpy())
        all_preds_task2.extend(preds_task2.cpu().numpy())
        all_preds_task3.extend(preds_task3.cpu().numpy())

        all_labels_task1.extend(labels_task1.cpu().numpy())
        all_labels_task2.extend(labels_task2.cpu().numpy())
        all_labels_task3.extend(labels_task3.cpu().numpy())

    # Compute metrics for each task
    accuracy_task1 = accuracy_score(all_labels_task1, all_preds_task1)
    accuracy_task2 = accuracy_score(all_labels_task2, all_preds_task2)
    accuracy_task3 = accuracy_score(all_labels_task3, all_preds_task3)

    f1_task1 = f1_score(all_labels_task1, all_preds_task1, average='weighted')
    f1_task2 = f1_score(all_labels_task2, all_preds_task2, average='weighted')
    f1_task3 = f1_score(all_labels_task3, all_preds_task3, average='weighted')

    print(f"Accuracy Task 1: {accuracy_task1:.4f}, F1 Task 1: {f1_task1:.4f}")
    print(f"Accuracy Task 2: {accuracy_task2:.4f}, F1 Task 2: {f1_task2:.4f}")
    print(f"Accuracy Task 3: {accuracy_task3:.4f}, F1 Task 3: {f1_task3:.4f}")


Accuracy Task 1: 0.8000, F1 Task 1: 0.7111
Accuracy Task 2: 0.1500, F1 Task 2: 0.0391
Accuracy Task 3: 0.2000, F1 Task 3: 0.0667


In [None]:
# save model state
torch.save(model.state_dict(), 'third_party_model_10epoch_underfitted.pth')

# save entire  model
torch.save(model, 'third_party_model_full_10epoch_underfitted.pth')