<a href="https://colab.research.google.com/github/ncerutti/colabs/blob/main/FSPD_MLP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install transformers torch wandb tqdm

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.28.1-py3-none-any.whl (7.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.0/7.0 MB[0m [31m68.9 MB/s[0m eta [36m0:00:00[0m
Collecting wandb
  Downloading wandb-0.14.2-py3-none-any.whl (2.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m67.3 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.13.4-py3-none-any.whl (200 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m200.1/200.1 kB[0m [31m22.2 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m52.2 MB/s[0m eta [36m0:00:00[0m
Collecting sentry-sdk>=

In [2]:
from google.colab import drive
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.utils.class_weight import compute_class_weight
from sklearn.utils.class_weight import compute_sample_weight
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import OneCycleLR
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from tqdm.notebook import tqdm
import wandb

torch.__version__

if torch.cuda.is_available():
    device = torch.device("cuda")
    print("All good")
    torch.cuda.empty_cache()
else:
    device = torch.device("cpu")
    print("No GPU!!!")

All good


In [3]:
drive.mount('/content/drive')
checkpoint_dir = "/content/drive/MyDrive/FSPD/ModelCheckpoints"
os.makedirs(checkpoint_dir, exist_ok=True)

Mounted at /content/drive


In [4]:
# epoch_to_load = 9  # Change this to the epoch number you want to load
# checkpoint_path = os.path.join(checkpoint_dir, f"epoch_{epoch_to_load}_classifier.pth")

# loaded_classifier = SimpleNNClassifier(input_size, hidden_size, output_size).to(device)
# loaded_classifier.load_state_dict(torch.load(checkpoint_path))
# loaded_classifier.eval()  # Set the model to evaluation mode

In [5]:
wandb.login()
wandb.init(project="FSPD", config={"architecture": "SimpleNNClassifier", "epochs": 25, "batch_size": 32, "learning_rate": "OCP", "hidden_size": 512})

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mncerutti[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [6]:
# Create clean_fspd function. This function will take in the fspd dataframe and return a cleaned version of it.

def clean_fspd(fspd_f):
    """This function takes in the fspd dataframe and returns a cleaned version of it.
    """
    # Create a list of columns that are not needed
    drop_cols = ["lever", "itype", "source1link", "framework", "iso", "region_wb", "income_group2", "defn", "initialdate", "inclusion", "envitarget", "diethealth"]
    # Drop the columns in drop_cols from fspd_f
    fspd_f = fspd_f.drop(columns=drop_cols)

    # Replace the values in "covid_mentioned" with 0 if they are "nan"
    fspd_f["covid_mentioned"] = fspd_f["covid_mentioned"].replace(np.nan, 0)

    # Replace the values in "targeted" with 0 if the are "" o "N" and with 1 if they are "Y"
    fspd_f["targeted"] = fspd_f["targeted"].replace("", 0)
    fspd_f["targeted"] = fspd_f["targeted"].replace("N", 0)
    fspd_f["targeted"] = fspd_f["targeted"].replace("Y", 1)

    # replace "policy_code" with 0 if it is empty
    fspd_f["policy_code"] = fspd_f["policy_code"].replace(np.nan, 0)
    fspd_f["y_end"] = fspd_f["y_end"].replace(np.nan, 0)
    fspd_f["y_start"] = fspd_f["y_start"].replace("", 0)
    
    return fspd_f



def encode_fspd(fspd_f):
    """This function takes in the fspd dataframe and returns the dataframe with one-hot encoding of a list of variables.
    """
    to_encode = ["country", "db", "policy_code", "y_start", "y_end", "income_group", "fsd_group"]
    fspd_f = pd.get_dummies(fspd_f, columns=to_encode)
    return fspd_f


def get_non_text_features(batch_data, non_text_features):
    batch_indices = batch_data["index"].numpy()
    batch_non_text_features = non_text_features.loc[batch_indices]
    batch_non_text_features_tensor = torch.tensor(batch_non_text_features.values, dtype=torch.float32)
    return batch_non_text_features_tensor


def compute_class_weights(y):
    unique_classes = np.unique(y)
    class_weights = compute_class_weight('balanced', classes=unique_classes, y=y)
    return dict(zip(unique_classes, class_weights))


def get_sample_weights(y, class_weights):
    return np.array([class_weights[cls] for cls in y])

In [7]:
class FSPData(Dataset):
    def __init__(self, data, target_segment):
        self.data = data
        #self.target_lever = target_lever
        self.target_segment = target_segment

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data.iloc[idx]
        return {
            "index": torch.tensor(idx, dtype=torch.long),  # Add this line
            "policydecision_details": item["policydecision_details_tokens"],
            "policy_description": item["policy_description_tokens"],
            "contextoradditionalinfo": item["contextoradditionalinfo_tokens"],
            "source1name": item["source1name_tokens"],
            # Include other features as needed
            #"lever": torch.tensor(self.target_lever[idx], dtype=torch.long),
            "segment": torch.tensor(self.target_segment[idx], dtype=torch.long)
        }


In [8]:
class SimpleNNClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

In [9]:
# Read in data from .dta
#fspd_f = pd.read_stata("/content/FSPD.dta", index_col="id")
fspd_f = pd.read_stata("/content/drive/MyDrive/FSPD/FSPD.dta", index_col="id")

In [10]:
fspd = clean_fspd(fspd_f)
encfspd = encode_fspd(fspd)

In [11]:
from transformers import DistilBertTokenizer, DistilBertModel 

# Initialize DistilBERT model and tokenizer
pretrained_model_name = "distilbert-base-uncased"
tokenizer = DistilBertTokenizer.from_pretrained(pretrained_model_name)
bert_model = DistilBertModel.from_pretrained(pretrained_model_name)

bert_model = bert_model.to(device)

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_transform.bias', 'vocab_projector.bias', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_layer_norm.weight', 'vocab_transform.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [12]:
#from transformers import BertTokenizer, BertModel

## Initialize BERT model and tokenizer
#pretrained_model_name = "bert-base-uncased"
#tokenizer = BertTokenizer.from_pretrained(pretrained_model_name)
#bert_model = BertModel.from_pretrained(pretrained_model_name)

In [13]:
encfspd["policydecision_details_tokens"] = encfspd["policydecision_details"].apply(lambda x: tokenizer.encode(x, truncation=True, max_length=128))
encfspd["policy_description_tokens"] = encfspd["policy_description"].apply(lambda x: tokenizer.encode(x, truncation=True, max_length=128))
encfspd["contextoradditionalinfo_tokens"] = encfspd["contextoradditionalinfo"].apply(lambda x: tokenizer.encode(x, truncation=True, max_length=96))
encfspd["source1name_tokens"] = encfspd["source1name"].apply(lambda x: tokenizer.encode(x, truncation=True, max_length=8))

In [14]:
max_length = max(encfspd[["policydecision_details_tokens", "policy_description_tokens", "contextoradditionalinfo_tokens", "source1name"]].applymap(len).max())

encfspd["policydecision_details_tokens"] = encfspd["policydecision_details_tokens"].apply(lambda x: x + [0] * (max_length - len(x)))
encfspd["policy_description_tokens"] = encfspd["policy_description_tokens"].apply(lambda x: x + [0] * (max_length - len(x)))
encfspd["contextoradditionalinfo_tokens"] = encfspd["contextoradditionalinfo_tokens"].apply(lambda x: x + [0] * (max_length - len(x)))
encfspd["source1name_tokens"] = encfspd["source1name_tokens"].apply(lambda x: x + [0] * (max_length - len(x)))

In [15]:
encfspd["policydecision_details_tokens"] = encfspd["policydecision_details_tokens"].apply(lambda x: torch.tensor(x))
encfspd["policy_description_tokens"] = encfspd["policy_description_tokens"].apply(lambda x: torch.tensor(x))
encfspd["contextoradditionalinfo_tokens"] = encfspd["contextoradditionalinfo_tokens"].apply(lambda x: torch.tensor(x))
encfspd["source1name_tokens"] = encfspd["source1name_tokens"].apply(lambda x: torch.tensor(x))

In [16]:
# Create LabelEncoder instances for lever and segment
#lever_encoder = LabelEncoder()
segment_encoder = LabelEncoder()

# Fit the encoders on the respective target labels and transform them
#encfspd["lever"] = lever_encoder.fit_transform(encfspd["lever"])
encfspd["segment"] = segment_encoder.fit_transform(encfspd["segment"])

# Extract lever and segment labels from the encfspd DataFrame
#lever_labels = encfspd["lever"].values
segment_labels = encfspd["segment"].values

In [17]:
np.unique(segment_labels)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [18]:
encfspd["segment"]

id
1.0         1
2.0        10
3.0         8
4.0        10
5.0        10
           ..
15588.0     3
15589.0     1
15590.0    10
15591.0     1
15592.0     4
Name: segment, Length: 15592, dtype: int64

In [20]:
## Careful: no test!
# train_data, val_data, train_segment, val_segment = train_test_split(encfspd, segment_labels, test_size=0.2, random_state=42)

# With test
train_data, temp_data, train_segment, temp_segment = train_test_split(encfspd, segment_labels, test_size=0.3, random_state=42)
val_data, test_data, val_segment, test_segment = train_test_split(temp_data, temp_segment, test_size=0.5, random_state=42)

# Compute class weights
class_weights = compute_class_weights(train_segment)

# Compute sample weights
train_sample_weights = get_sample_weights(train_segment, class_weights)

# Create WeightedRandomSampler
weighted_sampler = WeightedRandomSampler(train_sample_weights, num_samples=len(train_sample_weights), replacement=True)

train_dataset = FSPData(train_data, train_segment)
val_dataset = FSPData(val_data, val_segment)
test_dataset = FSPData(test_data, test_segment)

## Final: train on whole dataset
# train_dataset = FSPData(encfspd, segment_labels)

batch_size = 32
#train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=weighted_sampler)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [21]:
# Create non-text-feature dataframe. It contains column 6 and then from 8 to the end

slice1 = encfspd.iloc[:, 3]
slice2 = encfspd.iloc[:, 6:]
slice3 = encfspd.iloc[:, 10:411]

non_text_features = pd.concat([slice1, slice2, slice3], axis=1).reset_index(drop=True)

In [22]:
print(non_text_features.select_dtypes(include=['object']).columns)


Index(['policydecision_details_tokens', 'policy_description_tokens',
       'contextoradditionalinfo_tokens', 'source1name_tokens'],
      dtype='object')


In [23]:
non_text_features = non_text_features.drop(non_text_features.select_dtypes(include=['object']).columns, axis=1)


In [24]:
print(non_text_features.select_dtypes(include=['object']).columns)

Index([], dtype='object')


In [25]:
print(len(segment_labels))

15592


In [26]:
np.unique(segment_labels)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [27]:
# Classifier N.1: Simple NN

input_size = 3879
hidden_size = 512 
output_size = len(np.unique(segment_labels))

classifier = SimpleNNClassifier(input_size, hidden_size, output_size).to(device)
criterion = nn.CrossEntropyLoss()
#optimizer = optim.Adam(classifier.parameters(), lr=0.001)

runs = 1
epochs_per_run = 25
epochs = runs * epochs_per_run
learning_rate = 0.01

optimizer = optim.AdamW(classifier.parameters(), lr=learning_rate)
scheduler = OneCycleLR(optimizer, max_lr=learning_rate, epochs=epochs, steps_per_epoch=len(train_loader))

In [28]:
for epoch in range(epochs):
    current_run = epoch // epochs_per_run + 1
    current_epoch = epoch % epochs_per_run + 1
    train_running_loss = 0.0
    train_running_corrects = torch.tensor(0, device=device, dtype=torch.float)
    val_running_loss = 0.0
    val_running_corrects = torch.tensor(0, device=device, dtype=torch.float)

    # Training loop
    classifier.train()
    for batch_idx, batch_data in enumerate(tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs} [Training]")):
        
        batch_data["policydecision_details"] = batch_data["policydecision_details"].to(device)
        batch_data["policy_description"] = batch_data["policy_description"].to(device)
        batch_data["contextoradditionalinfo"] = batch_data["contextoradditionalinfo"].to(device)
        batch_data["source1name"] = batch_data["source1name"].to(device)

        # Obtain embeddings for each text feature
        policydecision_details_embeddings = bert_model(batch_data["policydecision_details"])
        policy_description_embeddings = bert_model(batch_data["policy_description"])
        contextoradditionalinfo_embeddings = bert_model(batch_data["contextoradditionalinfo"])
        source1name_embeddings = bert_model(batch_data["source1name"])

        # Concatenate embeddings
        combined_embeddings = torch.cat((policydecision_details_embeddings.last_hidden_state[:, 0, :],
                                        policy_description_embeddings.last_hidden_state[:, 0, :],
                                        contextoradditionalinfo_embeddings.last_hidden_state[:, 0, :],
                                        source1name_embeddings.last_hidden_state[:, 0, :]), dim=1)
        
        # Concatenate non-text features
        batch_non_text_features = get_non_text_features(batch_data, non_text_features)
        batch_non_text_features = batch_non_text_features.to(device)
        combined_features = torch.cat((combined_embeddings, batch_non_text_features), dim=1)

        # Make sure to zero the gradients before every training step
        optimizer.zero_grad()
        
        # Forward pass through the classifier
        logits = classifier(combined_features)
        
        # Calculate the loss
        loss = criterion(logits, batch_data["segment"].to(device))
        
        # Backward pass
        loss.backward()
        
        # Update the weights
        optimizer.step()
        # Accumulate the loss and correct predictions for the current batch
        train_running_loss += loss.item() * batch_data["policydecision_details"].size(0)
        _, preds = torch.max(logits, 1)
        train_running_corrects += torch.sum(preds == batch_data["segment"].to(device))

    scheduler.step()
    train_epoch_loss = train_running_loss / len(train_loader.dataset)
    train_epoch_acc = train_running_corrects.double() / len(train_loader.dataset)

    # Validation loop
    classifier.eval()
    with torch.no_grad():
        for batch_idx, batch_data in enumerate(tqdm(val_loader, desc=f"Epoch {epoch+1}/{epochs} [Validation]")):

            batch_data["policydecision_details"] = batch_data["policydecision_details"].to(device)
            batch_data["policy_description"] = batch_data["policy_description"].to(device)
            batch_data["contextoradditionalinfo"] = batch_data["contextoradditionalinfo"].to(device)
            batch_data["source1name"] = batch_data["source1name"].to(device)

            # Obtain embeddings for each text feature
            policydecision_details_embeddings = bert_model(batch_data["policydecision_details"])
            policy_description_embeddings = bert_model(batch_data["policy_description"])
            contextoradditionalinfo_embeddings = bert_model(batch_data["contextoradditionalinfo"])
            source1name_embeddings = bert_model(batch_data["source1name"])

            # Concatenate embeddings
            combined_embeddings = torch.cat((policydecision_details_embeddings.last_hidden_state[:, 0, :],
                                            policy_description_embeddings.last_hidden_state[:, 0, :],
                                            contextoradditionalinfo_embeddings.last_hidden_state[:, 0, :],
                                            source1name_embeddings.last_hidden_state[:, 0, :]), dim=1)
            
            # Concatenate non-text features
            batch_non_text_features = get_non_text_features(batch_data, non_text_features)
            batch_non_text_features = batch_non_text_features.to(device)
            combined_features = torch.cat((combined_embeddings, batch_non_text_features), dim=1)

            # Forward pass through the classifier
            logits = classifier(combined_features)

            # Calculate the loss
            loss = criterion(logits, batch_data["segment"].to(device))

            # Accumulate the loss and correct predictions for the current batch
            val_running_loss += loss.item() * batch_data["policydecision_details"].size(0)
            _, preds = torch.max(logits, 1)
            val_running_corrects += torch.sum(preds == batch_data["segment"].to(device))

    val_epoch_loss = val_running_loss / len(val_loader.dataset)
    val_epoch_acc = val_running_corrects.double() / len(val_loader.dataset)

    # Log metrics to wandb
    wandb.log({"train_loss": train_epoch_loss, "train_acc": train_epoch_acc,
               "val_loss": val_epoch_loss, "val_acc": val_epoch_acc})

    # Print metrics to console
    print(f"Run {current_run}/{runs}, Epoch {current_epoch}/{epochs_per_run}")
    print(f"Train Loss: {train_epoch_loss:.4f}, Train Acc: {train_epoch_acc:.4f}")
    print(f"Val Loss: {val_epoch_loss:.4f}, Val Acc: {val_epoch_acc:.4f}")
    checkpoint_path = os.path.join(checkpoint_dir, f"epoch_{epoch+1}_classifier_thrice.pth")
    torch.save(classifier.state_dict(), checkpoint_path)

wandb.finish()


Epoch 1/25 [Training]:   0%|          | 0/342 [00:00<?, ?it/s]

Epoch 1/25 [Validation]:   0%|          | 0/74 [00:00<?, ?it/s]

Run 1/1, Epoch 1/25
Train Loss: 1.7354, Train Acc: 0.4050
Val Loss: 1.4792, Val Acc: 0.5614


Epoch 2/25 [Training]:   0%|          | 0/342 [00:00<?, ?it/s]

Epoch 2/25 [Validation]:   0%|          | 0/74 [00:00<?, ?it/s]

Run 1/1, Epoch 2/25
Train Loss: 1.1484, Train Acc: 0.6472
Val Loss: 1.2413, Val Acc: 0.5844


Epoch 3/25 [Training]:   0%|          | 0/342 [00:00<?, ?it/s]

Epoch 3/25 [Validation]:   0%|          | 0/74 [00:00<?, ?it/s]

Run 1/1, Epoch 3/25
Train Loss: 0.8134, Train Acc: 0.7653
Val Loss: 0.8943, Val Acc: 0.7157


Epoch 4/25 [Training]:   0%|          | 0/342 [00:00<?, ?it/s]

Epoch 4/25 [Validation]:   0%|          | 0/74 [00:00<?, ?it/s]

Run 1/1, Epoch 4/25
Train Loss: 0.6028, Train Acc: 0.8252
Val Loss: 0.9793, Val Acc: 0.6533


Epoch 5/25 [Training]:   0%|          | 0/342 [00:00<?, ?it/s]

Epoch 5/25 [Validation]:   0%|          | 0/74 [00:00<?, ?it/s]

Run 1/1, Epoch 5/25
Train Loss: 0.4722, Train Acc: 0.8642
Val Loss: 0.6995, Val Acc: 0.7700


Epoch 6/25 [Training]:   0%|          | 0/342 [00:00<?, ?it/s]

Epoch 6/25 [Validation]:   0%|          | 0/74 [00:00<?, ?it/s]

Run 1/1, Epoch 6/25
Train Loss: 0.4001, Train Acc: 0.8793
Val Loss: 0.6479, Val Acc: 0.7914


Epoch 7/25 [Training]:   0%|          | 0/342 [00:00<?, ?it/s]

Epoch 7/25 [Validation]:   0%|          | 0/74 [00:00<?, ?it/s]

Run 1/1, Epoch 7/25
Train Loss: 0.3278, Train Acc: 0.9031
Val Loss: 0.5913, Val Acc: 0.7978


Epoch 8/25 [Training]:   0%|          | 0/342 [00:00<?, ?it/s]

Epoch 8/25 [Validation]:   0%|          | 0/74 [00:00<?, ?it/s]

Run 1/1, Epoch 8/25
Train Loss: 0.2742, Train Acc: 0.9193
Val Loss: 0.6119, Val Acc: 0.7935


Epoch 9/25 [Training]:   0%|          | 0/342 [00:00<?, ?it/s]

Epoch 9/25 [Validation]:   0%|          | 0/74 [00:00<?, ?it/s]

Run 1/1, Epoch 9/25
Train Loss: 0.2538, Train Acc: 0.9250
Val Loss: 0.5739, Val Acc: 0.8123


Epoch 10/25 [Training]:   0%|          | 0/342 [00:00<?, ?it/s]

Epoch 10/25 [Validation]:   0%|          | 0/74 [00:00<?, ?it/s]

Run 1/1, Epoch 10/25
Train Loss: 0.2360, Train Acc: 0.9293
Val Loss: 0.5022, Val Acc: 0.8358


Epoch 11/25 [Training]:   0%|          | 0/342 [00:00<?, ?it/s]

Epoch 11/25 [Validation]:   0%|          | 0/74 [00:00<?, ?it/s]

Run 1/1, Epoch 11/25
Train Loss: 0.2118, Train Acc: 0.9348
Val Loss: 0.5007, Val Acc: 0.8371


Epoch 12/25 [Training]:   0%|          | 0/342 [00:00<?, ?it/s]

Epoch 12/25 [Validation]:   0%|          | 0/74 [00:00<?, ?it/s]

Run 1/1, Epoch 12/25
Train Loss: 0.1904, Train Acc: 0.9446
Val Loss: 0.5102, Val Acc: 0.8204


Epoch 13/25 [Training]:   0%|          | 0/342 [00:00<?, ?it/s]

Epoch 13/25 [Validation]:   0%|          | 0/74 [00:00<?, ?it/s]

Run 1/1, Epoch 13/25
Train Loss: 0.1937, Train Acc: 0.9401
Val Loss: 0.4121, Val Acc: 0.8670


Epoch 14/25 [Training]:   0%|          | 0/342 [00:00<?, ?it/s]

Epoch 14/25 [Validation]:   0%|          | 0/74 [00:00<?, ?it/s]

Run 1/1, Epoch 14/25
Train Loss: 0.1741, Train Acc: 0.9477
Val Loss: 0.3821, Val Acc: 0.8739


Epoch 15/25 [Training]:   0%|          | 0/342 [00:00<?, ?it/s]

Epoch 15/25 [Validation]:   0%|          | 0/74 [00:00<?, ?it/s]

Run 1/1, Epoch 15/25
Train Loss: 0.1593, Train Acc: 0.9472
Val Loss: 0.4092, Val Acc: 0.8666


Epoch 16/25 [Training]:   0%|          | 0/342 [00:00<?, ?it/s]

Epoch 16/25 [Validation]:   0%|          | 0/74 [00:00<?, ?it/s]

Run 1/1, Epoch 16/25
Train Loss: 0.1491, Train Acc: 0.9534
Val Loss: 0.4593, Val Acc: 0.8636


Epoch 17/25 [Training]:   0%|          | 0/342 [00:00<?, ?it/s]

Epoch 17/25 [Validation]:   0%|          | 0/74 [00:00<?, ?it/s]

Run 1/1, Epoch 17/25
Train Loss: 0.1301, Train Acc: 0.9610
Val Loss: 0.3791, Val Acc: 0.8811


Epoch 18/25 [Training]:   0%|          | 0/342 [00:00<?, ?it/s]

Epoch 18/25 [Validation]:   0%|          | 0/74 [00:00<?, ?it/s]

Run 1/1, Epoch 18/25
Train Loss: 0.1380, Train Acc: 0.9561
Val Loss: 0.3634, Val Acc: 0.8794


Epoch 19/25 [Training]:   0%|          | 0/342 [00:00<?, ?it/s]

Epoch 19/25 [Validation]:   0%|          | 0/74 [00:00<?, ?it/s]

Run 1/1, Epoch 19/25
Train Loss: 0.1257, Train Acc: 0.9589
Val Loss: 0.3549, Val Acc: 0.8850


Epoch 20/25 [Training]:   0%|          | 0/342 [00:00<?, ?it/s]

Epoch 20/25 [Validation]:   0%|          | 0/74 [00:00<?, ?it/s]

Run 1/1, Epoch 20/25
Train Loss: 0.1050, Train Acc: 0.9697
Val Loss: 0.3116, Val Acc: 0.8982


Epoch 21/25 [Training]:   0%|          | 0/342 [00:00<?, ?it/s]

Epoch 21/25 [Validation]:   0%|          | 0/74 [00:00<?, ?it/s]

Run 1/1, Epoch 21/25
Train Loss: 0.1138, Train Acc: 0.9662
Val Loss: 0.4926, Val Acc: 0.8619


Epoch 22/25 [Training]:   0%|          | 0/342 [00:00<?, ?it/s]

Epoch 22/25 [Validation]:   0%|          | 0/74 [00:00<?, ?it/s]

Run 1/1, Epoch 22/25
Train Loss: 0.0983, Train Acc: 0.9709
Val Loss: 0.4168, Val Acc: 0.8747


Epoch 23/25 [Training]:   0%|          | 0/342 [00:00<?, ?it/s]

Epoch 23/25 [Validation]:   0%|          | 0/74 [00:00<?, ?it/s]

Run 1/1, Epoch 23/25
Train Loss: 0.1159, Train Acc: 0.9609
Val Loss: 0.3088, Val Acc: 0.8944


Epoch 24/25 [Training]:   0%|          | 0/342 [00:00<?, ?it/s]

Epoch 24/25 [Validation]:   0%|          | 0/74 [00:00<?, ?it/s]

Run 1/1, Epoch 24/25
Train Loss: 0.0921, Train Acc: 0.9710
Val Loss: 0.3555, Val Acc: 0.8833


Epoch 25/25 [Training]:   0%|          | 0/342 [00:00<?, ?it/s]

Epoch 25/25 [Validation]:   0%|          | 0/74 [00:00<?, ?it/s]

Run 1/1, Epoch 25/25
Train Loss: 0.0895, Train Acc: 0.9725
Val Loss: 0.3509, Val Acc: 0.8893


0,1
train_acc,▁▄▅▆▇▇▇▇▇▇███████████████
train_loss,█▆▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▁▄▃▅▆▆▆▆▇▇▆▇▇▇▇████▇████
val_loss,█▇▅▅▃▃▃▃▃▂▂▂▂▁▂▂▁▁▁▁▂▂▁▁▁

0,1
train_acc,0.97251
train_loss,0.08952
val_acc,0.88927
val_loss,0.35087


In [29]:
## TEST



# Evaluate the model on the test dataset
classifier.eval()

test_running_loss = 0.0
test_running_corrects = torch.tensor(0, device=device, dtype=torch.float)

with torch.no_grad():
    for batch_idx, batch_data in enumerate(tqdm(test_loader, desc="Testing")):
        batch_data["policydecision_details"] = batch_data["policydecision_details"].to(device)
        batch_data["policy_description"] = batch_data["policy_description"].to(device)
        batch_data["contextoradditionalinfo"] = batch_data["contextoradditionalinfo"].to(device)
        batch_data["source1name"] = batch_data["source1name"].to(device)

        # Obtain embeddings for each text feature
        policydecision_details_embeddings = bert_model(batch_data["policydecision_details"])
        policy_description_embeddings = bert_model(batch_data["policy_description"])
        contextoradditionalinfo_embeddings = bert_model(batch_data["contextoradditionalinfo"])
        source1name_embeddings = bert_model(batch_data["source1name"])

        # Concatenate embeddings
        combined_embeddings = torch.cat((policydecision_details_embeddings.last_hidden_state[:, 0, :],
                                         policy_description_embeddings.last_hidden_state[:, 0, :],
                                         contextoradditionalinfo_embeddings.last_hidden_state[:, 0, :],
                                         source1name_embeddings.last_hidden_state[:, 0, :]), dim=1)

        # Concatenate non-text features
        batch_non_text_features = get_non_text_features(batch_data, non_text_features)
        batch_non_text_features = batch_non_text_features.to(device)
        combined_features = torch.cat((combined_embeddings, batch_non_text_features), dim=1)

        # Forward pass through the classifier
        logits = classifier(combined_features)

        # Calculate the loss
        loss = criterion(logits, batch_data["segment"].to(device))

        # Accumulate the loss and correct predictions for the current batch
        test_running_loss += loss.item() * batch_data["policydecision_details"].size(0)
        _, preds = torch.max(logits, 1)
        test_running_corrects += torch.sum(preds == batch_data["segment"].to(device))

test_loss = test_running_loss / len(test_loader.dataset)
test_acc = test_running_corrects.double() / len(test_loader.dataset)

print(f"Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.4f}")


Testing:   0%|          | 0/74 [00:00<?, ?it/s]

Test Loss: 0.3765, Test Acc: 0.8841


In [30]:
loaded_classifier = SimpleNNClassifier(input_size, hidden_size, output_size).to(device)
loaded_classifier.load_state_dict(torch.load('/content/drive/MyDrive/FSPD/ModelCheckpoints/epoch_20_classifier_thrice.pth'))
loaded_classifier.eval()  # Set the model to evaluation mode

SimpleNNClassifier(
  (fc1): Linear(in_features=3879, out_features=512, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=512, out_features=11, bias=True)
)

In [31]:
test_running_loss = 0.0
test_running_corrects = torch.tensor(0, device=device, dtype=torch.float)

with torch.no_grad():
    for batch_idx, batch_data in enumerate(tqdm(test_loader, desc="Testing")):
        batch_data["policydecision_details"] = batch_data["policydecision_details"].to(device)
        batch_data["policy_description"] = batch_data["policy_description"].to(device)
        batch_data["contextoradditionalinfo"] = batch_data["contextoradditionalinfo"].to(device)
        batch_data["source1name"] = batch_data["source1name"].to(device)

        # Obtain embeddings for each text feature
        policydecision_details_embeddings = bert_model(batch_data["policydecision_details"])
        policy_description_embeddings = bert_model(batch_data["policy_description"])
        contextoradditionalinfo_embeddings = bert_model(batch_data["contextoradditionalinfo"])
        source1name_embeddings = bert_model(batch_data["source1name"])

        # Concatenate embeddings
        combined_embeddings = torch.cat((policydecision_details_embeddings.last_hidden_state[:, 0, :],
                                         policy_description_embeddings.last_hidden_state[:, 0, :],
                                         contextoradditionalinfo_embeddings.last_hidden_state[:, 0, :],
                                         source1name_embeddings.last_hidden_state[:, 0, :]), dim=1)

        # Concatenate non-text features
        batch_non_text_features = get_non_text_features(batch_data, non_text_features)
        batch_non_text_features = batch_non_text_features.to(device)
        combined_features = torch.cat((combined_embeddings, batch_non_text_features), dim=1)

        # Forward pass through the classifier
        logits = loaded_classifier(combined_features)

        # Calculate the loss
        loss = criterion(logits, batch_data["segment"].to(device))

        # Accumulate the loss and correct predictions for the current batch
        test_running_loss += loss.item() * batch_data["policydecision_details"].size(0)
        _, preds = torch.max(logits, 1)
        test_running_corrects += torch.sum(preds == batch_data["segment"].to(device))

test_loss = test_running_loss / len(test_loader.dataset)
test_acc = test_running_corrects.double() / len(test_loader.dataset)

print(f"Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.4f}")

Testing:   0%|          | 0/74 [00:00<?, ?it/s]

Test Loss: 0.3382, Test Acc: 0.8957
