# Test Phase

<h2> Load data

In [1]:
import pandas as pd
from google.colab import drive
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import torch
import torch.nn as nn
import logging
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from tqdm import tqdm
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import json
import torch.nn.functional as F
import os
from torch.utils.data import Dataset
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


In [2]:
# load google drive to see the files in google drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
directory_BCEWithLogitLoss = '2024-07-04_08-07-28' ### ATTENZIONE!!! ALWAYS CHANGE THIS!!!!###

path = '/content/drive/MyDrive/Progetto-Vascon/DataLog'
save_dir_base_BCEWithLogitLoss = os.path.join(path, directory_BCEWithLogitLoss)
save_dir_bert_BCEWithLogitLoss = os.path.join(path, "bert_sequence_classification_trained")

In [None]:
directory_MultiLabelSoftMargin = '2024-07-03_10-03-02' ### ATTENZIONE!!! ALWAYS CHANGE THIS!!!!###

save_dir_base_MultiLabelSoftMargin = os.path.join(path, directory_MultiLabelSoftMargin)
save_dir_bert_MultiLabelSoftMargin = os.path.join(path, "bert_sequence_classification_trained")

<h2> GET the BERT embeddings from drive

In [4]:
import pickle
import os
import numpy as np

# Define filenames for each of the embeddings
file_words_test = "embeddings_words_test.pkl"
file_cls_test = "embedding_cls_test.pkl"

# Load the embeddings from the specified directory
with open(os.path.join(path, 'embeddings', file_words_test), "rb") as f:
    embeddings_words_test = pickle.load(f)

with open(os.path.join(path, 'embeddings', file_cls_test), "rb") as f:
    embedding_cls_test = pickle.load(f)

y_test = np.load(os.path.join(path, 'embeddings', 'y_test.npy'))

print("Embeddings loaded successfully.")

Embeddings loaded successfully.


# CLASS Model! ALWAYS CHANGE!
 modificare in base al criterion

In [5]:
class CustomDatasetForCLSToken(Dataset):
    def __init__(self, data, targets, cls_tokens):
        self.data = data
        self.targets = targets
        self.cls_tokens = cls_tokens

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data[idx]
        target = self.targets[idx]
        cls_token = self.cls_tokens[idx]

        return sample, target, cls_token


custom_test_dataset = CustomDatasetForCLSToken(embeddings_words_test, y_test, embedding_cls_test)

In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import json

class Attention(nn.Module):
    def __init__(self, hidden_dim):
        """
        Initialize the Attention mechanism.
        Args:
            hidden_dim (int): The number of expected features in the input.
        """
        super(Attention, self).__init__()
        self.hidden_dim = hidden_dim
        self.attn = nn.Linear(hidden_dim, 1)
        self.init_weights()

    def init_weights(self):
        """
        Initialize weights for the attention layer.
        """
        nn.init.xavier_uniform_(self.attn.weight)
        if self.attn.bias is not None:
            nn.init.constant_(self.attn.bias, 0)

    def forward(self, lstm_output):
        """
        Forward pass for the attention mechanism.
        Args:
            lstm_output (torch.Tensor): Output from the LSTM layer.
        Returns:
            torch.Tensor: Attention weights.
        """
        energy = torch.tanh(self.attn(lstm_output))  # shape: (batch_size, seq_len, 1)
        energy = energy.squeeze(-1)  # shape: (batch_size, seq_len)
        attention_weights = F.softmax(energy, dim=1)  # shape: (batch_size, seq_len)
        return attention_weights

class SentimentClassifierWithSoftAttention(nn.Module):
    def __init__(
        self,
        embedding_dim=768,
        hidden_dim=256,
        output_dim=6,
        n_layers=1,
        bidirectional=True,
        dropout=0.0,
        rnn_type='LSTM',
    ):
        """
        Initialize the SentimentClassifierWithSoftAttention model.
        Args:
            embedding_dim (int): Dimension of the input embeddings.
            hidden_dim (int): Dimension of the hidden layer.
            output_dim (int): Dimension of the output layer.
            n_layers (int): Number of recurrent layers.
            bidirectional (bool): If True, use a bidirectional RNN.
            dropout (float): Dropout probability.
            rnn_type (str): Type of RNN to use ('LSTM' or 'GRU').
        """
        super().__init__()
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.n_layers = n_layers
        self.bidirectional = bidirectional
        self.dropout = dropout
        self.rnn_type = rnn_type

        self.attention = Attention(hidden_dim * 2 if bidirectional else hidden_dim)

        if rnn_type == 'LSTM':
            self.rnn = nn.LSTM(
                embedding_dim,
                hidden_dim,
                num_layers=n_layers,
                bidirectional=bidirectional,
                dropout=dropout if n_layers > 1 else 0,
                batch_first=True,
            )
        elif rnn_type == 'GRU':
            self.rnn = nn.GRU(
                embedding_dim,
                hidden_dim,
                num_layers=n_layers,
                bidirectional=bidirectional,
                dropout=dropout if n_layers > 1 else 0,
                batch_first=True,
            )
        else:
            raise ValueError("Choose a valid RNN type: LSTM or GRU")

        self.fc = nn.Sequential(
            nn.Linear(hidden_dim * 2 if bidirectional else hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, output_dim)
        )

        self.fc_cls = nn.Sequential(
            nn.Linear(hidden_dim * 2 + embedding_dim if bidirectional else hidden_dim + embedding_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, output_dim)
        )

        self.init_weights()

    def init_weights(self):
        """
        Initialize weights for the RNN and fully connected layers.
        """
        for name, param in self.rnn.named_parameters():
            if 'weight' in name:
                nn.init.xavier_uniform_(param.data)
            elif 'bias' in name:
                nn.init.constant_(param.data, 0)

        for layer in self.fc_cls:
            if isinstance(layer, nn.Linear):
                nn.init.xavier_uniform_(layer.weight)
                if layer.bias is not None:
                    nn.init.constant_(layer.bias, 0)

    def forward(self, embedded):
        """
        Forward pass for the model.
        Args:
            embedded (torch.Tensor): Input embeddings.
        Returns:
            torch.Tensor: Output logits.
            torch.Tensor: Attention weights.
        """
        weighted_sum, attention_weights = self.prepare_data(embedded)
        dense_outputs = self.fc(weighted_sum)
        return dense_outputs, attention_weights  # Return attention weights

    def forward_with_cls(self, embedded, cls_token):
        """
        Forward pass for the model with CLS token.
        Args:
            embedded (torch.Tensor): Input embeddings.
            cls_token (torch.Tensor): CLS token.
        Returns:
            torch.Tensor: Output logits.
            torch.Tensor: Attention weights.
        """
        weighted_sum, attention_weights = self.prepare_data(embedded)
        cls_token = cls_token.squeeze(1)
        weighted_sum_with_cls = torch.cat((weighted_sum, cls_token), dim=1) # concatenate attention weights + cls_token
        dense_outputs = self.fc_cls(weighted_sum_with_cls)
        return dense_outputs, attention_weights  # Return attention weights

    def prepare_data(self, embedded):
        """
        Prepare data by applying RNN and attention mechanism.
        Args:
            embedded (torch.Tensor): Input embeddings.
        Returns:
            torch.Tensor: Weighted sum of LSTM outputs.
            torch.Tensor: Attention weights.
        """
        if len(embedded.shape) != 3:
            raise ValueError("Input shape must be 3D: (batch_size, seq_len, embedding_dim)")
        lstm_output, _ = self.rnn(embedded)  # lstm_output shape: (batch_size, seq_len, hidden_dim)
        attention_weights = self.attention(lstm_output)  # attention_weights shape: (batch_size, seq_len)
        attention_weights = attention_weights.unsqueeze(1)  # attention_weights shape: (batch_size, 1, seq_len)
        weighted = torch.bmm(attention_weights, lstm_output)  # weighted shape: (batch_size, 1, hidden_dim)
        weighted_sum = weighted.squeeze(1)  # weighted_sum shape: (batch_size, hidden_dim)
        return weighted_sum, attention_weights.squeeze(1)  # Return attention weights

    def get(self):
        """
        Get model parameters as a JSON string.
        Returns:
            str: JSON string of model parameters.
        """
        params = {
            'embedding_dim': self.embedding_dim,
            'hidden_dim': self.hidden_dim,
            'output_dim': self.output_dim,
            'n_layers': self.n_layers,
            'bidirectional': self.bidirectional,
            'dropout': self.dropout,
            'rnn_type': self.rnn_type
        }
        return json.dumps(params)


In [7]:
def calculate_metrics(predictions, targets):
    num_classes = predictions.shape[1]
    class_metrics = {i: {'true_positives': 0, 'true_negatives': 0, 'false_positives': 0, 'false_negatives': 0} for i in range(num_classes)}

    for i in range(num_classes):
        for pred, target in zip(predictions[:, i], targets[:, i]):
            if pred == 1 and target == 1:
                class_metrics[i]['true_positives'] += 1
            elif pred == 0 and target == 0:
                class_metrics[i]['true_negatives'] += 1
            elif pred == 1 and target == 0:
                class_metrics[i]['false_positives'] += 1
            elif pred == 0 and target == 1:
                class_metrics[i]['false_negatives'] += 1



    return pd.DataFrame(class_metrics).transpose()

def calculate_accuracy(metrics):
    return (metrics['true_positives'].sum()+metrics['true_negatives'].sum())/metrics.sum().sum()

def calculate_precision(metrics):

    if (metrics['true_positives'].sum() + metrics['false_positives'].sum()) >0 :
      return (metrics['true_positives'].sum())/(metrics['true_positives'].sum() + metrics['false_positives'].sum())
    else: return 0

def calculate_recall(metrics):

    if (metrics['true_positives'].sum() + metrics['false_negatives'].sum()) >0 :
      return (metrics['true_positives'].sum())/(metrics['true_positives'].sum() + metrics['false_negatives'].sum())
    else: return 0

In [8]:
def calculate_hamming_distance(predictions, targets):
    # Ensure predictions and targets are numpy arrays
    import numpy as np
    if not isinstance(predictions, np.ndarray):
        predictions = np.array(predictions)
    if not isinstance(targets, np.ndarray):
        targets = np.array(targets)

    # Check the shape and size of predictions and targets
    assert predictions.shape == targets.shape, "Predictions and targets must have the same shape."

    # Calculate the Hamming distance
    hamming_distance = np.sum(predictions != targets)

    return hamming_distance

In [9]:
def calculate_f1_score(precision, recall):

    # Calculate F1 score using harmonic mean of precision and recall
    if precision + recall == 0:
        return 0  # Avoid division by zero, return zero if both precision and recall are zero

    f1_score = 2 * (precision * recall) / (precision + recall)

    return f1_score

<h2> Parameters

In [10]:
batch_size = 264
thershold = 0.65

test_loader = DataLoader(custom_test_dataset, batch_size=batch_size, shuffle=True)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

<h2> Testing Phase without CLS

In [11]:
# After training, load the best model for test prediction
#best_model = SentimentClassifierWithSoftAttentionWithCLS().to(device)
#best_model.load_state_dict(torch.load('best_model_path'))

def test_without_cls(model_rnn, test_loader, t=0.5):
  model_rnn.eval()

  # Use the best model for prediction on the test set
  test_predictions = []
  target_embeddings = []
  with torch.no_grad():
      for data, targets, _ in test_loader:
          data, targets = data.to(device), targets.to(device) #, cls_tokens.to(device)
          #cls_tokens = cls_tokens.view(cls_tokens.shape[0], 1, cls_tokens.shape[1])

          outputs, _ = model_rnn.forward(data.to(dtype=torch.float32)) #, cls_tokens.to(dtype=torch.float32))

          # Round values above threshold to 1 and set others to 0
          predictions = torch.where(outputs > t, torch.tensor(1.0), torch.tensor(0.0))

          # predictions = torch.round(torch.sigmoid(outputs))
          test_predictions.append(predictions.cpu())
          target_embeddings.append(targets.cpu())

  # Convert test_predictions list to a single numpy array if needed

  predicted_embeddings_np = torch.cat(test_predictions, dim=0).cpu().numpy()
  target_embeddings_np = torch.cat(target_embeddings, dim=0).cpu().numpy()

  metrics = calculate_metrics(predicted_embeddings_np,target_embeddings_np)
  overall_test_accuracy = calculate_accuracy(metrics)
  overall_test_precision = calculate_precision(metrics)
  overall_test_recall = calculate_recall(metrics)
  overall_test_f1_score = calculate_f1_score(overall_test_precision, overall_test_recall)
  overall_test_hamming_distance = calculate_hamming_distance(predicted_embeddings_np,target_embeddings_np)

  print("----")
  print(f"Test Accuracy: {overall_test_accuracy:.4f}")
  print(f"Test Precision: {overall_test_precision:.4f}")
  print(f"Test Recall: {overall_test_recall:.4f}")
  print(f"Test F1-score: {overall_test_f1_score:.4f}")
  print(f"Test Hamming Distance: {overall_test_hamming_distance}")
  print("")

In [12]:
filename = 'LSTM/best_model_LSTM.pth'
best_model_path_lstm_BCEWithLogitLoss = os.path.join(save_dir_base_BCEWithLogitLoss, filename)

best_model_lstm_BCEWithLogitLoss = SentimentClassifierWithSoftAttention(rnn_type='LSTM').to(device)
best_model_lstm_BCEWithLogitLoss.load_state_dict(torch.load(best_model_path_lstm_BCEWithLogitLoss))
test_without_cls(best_model_lstm_BCEWithLogitLoss, test_loader, t=thershold)

----
Test Accuracy: 0.8832
Test Precision: 0.8725
Test Recall: 0.7146
Test F1-score: 0.7857
Test Hamming Distance: 5575



In [None]:
best_model_path_lstm_MultiLabelSoftMargin = os.path.join(save_dir_base_MultiLabelSoftMargin, filename)

best_model_lstm_MultiLabelSoftMargin = SentimentClassifierWithSoftAttention(rnn_type='LSTM').to(device)
best_model_lstm_MultiLabelSoftMargin.load_state_dict(torch.load(best_model_path_lstm_MultiLabelSoftMargin))
test_without_cls(best_model_lstm_MultiLabelSoftMargin, test_loader, t=thershold)

----
Test Accuracy: 0.8846
Test Precision: 0.8620
Test Recall: 0.7321
Test F1-score: 0.7918
Test Hamming Distance: 5508



In [14]:
def test_both_models_without_cls(model_rnn1, model_rnn2, test_loader, t=0.5):
  model_rnn1.eval()
  model_rnn2.eval()

  # Use the best model for prediction on the test set
  test_predictions = []
  target_embeddings = []
  with torch.no_grad():
      for data, targets, _ in test_loader:
          data, targets = data.to(device), targets.to(device) #, cls_tokens.to(device)
          #cls_tokens = cls_tokens.view(cls_tokens.shape[0], 1, cls_tokens.shape[1])

          outputs1, _ = model_rnn1.forward(data.to(dtype=torch.float32)) #, cls_tokens.to(dtype=torch.float32))
          outputs2, _ = model_rnn2.forward(data.to(dtype=torch.float32))

          # Round values above threshold to 1 and set others to 0
          predictions1 = torch.where(outputs1 > t, torch.tensor(1.0), torch.tensor(0.0))
          predictions2 = torch.where(outputs2 > t, torch.tensor(1.0), torch.tensor(0.0))

          predictions = torch.bitwise_and(predictions1.to(torch.int64),
                                          predictions2.to(torch.int64)).to(torch.float32)


          # predictions = torch.round(torch.sigmoid(outputs))
          test_predictions.append(predictions.cpu())
          target_embeddings.append(targets.cpu())

  # Convert test_predictions list to a single numpy array if needed

  predicted_embeddings_np = torch.cat(test_predictions, dim=0).cpu().numpy()
  target_embeddings_np = torch.cat(target_embeddings, dim=0).cpu().numpy()

  metrics = calculate_metrics(predicted_embeddings_np,target_embeddings_np)
  overall_test_accuracy = calculate_accuracy(metrics)
  overall_test_precision = calculate_precision(metrics)
  overall_test_recall = calculate_recall(metrics)
  overall_test_f1_score = calculate_f1_score(overall_test_precision, overall_test_recall)
  overall_test_hamming_distance = calculate_hamming_distance(predicted_embeddings_np,target_embeddings_np)

  print("----")
  print(f"Test Accuracy: {overall_test_accuracy:.4f}")
  print(f"Test Precision: {overall_test_precision:.4f}")
  print(f"Test Recall: {overall_test_recall:.4f}")
  print(f"Test F1-score: {overall_test_f1_score:.4f}")
  print(f"Test Hamming Distance: {overall_test_hamming_distance}")
  print("")

#test_both_models_without_cls(best_model_lstm_BCEWithLogitLoss, best_model_lstm_MultiLabelSoftMargin, test_loader, t=thershold)

In [13]:
filename = 'GRU/best_model_GRU.pth'
best_model_path_gru_BCEWithLogitLoss = os.path.join(save_dir_base_BCEWithLogitLoss, filename)


best_model_gru_BCEWithLogitLoss = SentimentClassifierWithSoftAttention(rnn_type='GRU').to(device)
best_model_gru_BCEWithLogitLoss.load_state_dict(torch.load(best_model_path_gru_BCEWithLogitLoss))
test_without_cls(best_model_gru_BCEWithLogitLoss, test_loader, t=thershold)

----
Test Accuracy: 0.8704
Test Precision: 0.9168
Test Recall: 0.6245
Test F1-score: 0.7429
Test Hamming Distance: 6182



In [15]:
test_both_models_without_cls(best_model_lstm_BCEWithLogitLoss, best_model_gru_BCEWithLogitLoss, test_loader, t=thershold)

----
Test Accuracy: 0.8654
Test Precision: 0.9274
Test Recall: 0.5977
Test F1-score: 0.7269
Test Hamming Distance: 6423



In [None]:
best_model_path_gru_MultiLabelSoftMargin = os.path.join(save_dir_base_MultiLabelSoftMargin, filename)


best_model_gru_MultiLabelSoftMargin = SentimentClassifierWithSoftAttention(rnn_type='GRU').to(device)
best_model_gru_MultiLabelSoftMargin.load_state_dict(torch.load(best_model_path_gru_MultiLabelSoftMargin))
test_without_cls(best_model_gru_MultiLabelSoftMargin, test_loader, t=thershold)

----
Test Accuracy: 0.8690
Test Precision: 0.9182
Test Recall: 0.6182
Test F1-score: 0.7389
Test Hamming Distance: 6249



In [None]:
test_both_models_without_cls(best_model_gru_BCEWithLogitLoss,
                             best_model_gru_MultiLabelSoftMargin, test_loader, t=thershold)

----
Test Accuracy: 0.8677
Test Precision: 0.9201
Test Recall: 0.6118
Test F1-score: 0.7349
Test Hamming Distance: 6313



<h2> Testing Phase with CLS

In [16]:
def test_with_cls(model_rnn, test_loader, t=0.5):
  model_rnn.eval()

  # Use the best model for prediction on the test set
  test_predictions = []
  target_embeddings = []
  with torch.no_grad():
      for data, targets, cls_tokens in test_loader:
          data, targets, cls_tokens = data.to(device), targets.to(device), cls_tokens.to(device)
          cls_tokens = cls_tokens.view(cls_tokens.shape[0], 1, cls_tokens.shape[1])

          outputs, _ = model_rnn.forward_with_cls(data.to(dtype=torch.float32), cls_tokens.to(dtype=torch.float32))
          predictions = torch.where(outputs > t, torch.tensor(1.0), torch.tensor(0.0)) #torch.round(torch.sigmoid(outputs))
          test_predictions.append(predictions.cpu())
          target_embeddings.append(targets.cpu())

  # Convert test_predictions list to a single numpy array if needed

  predicted_embeddings_np = torch.cat(test_predictions, dim=0).cpu().numpy()
  target_embeddings_np = torch.cat(target_embeddings, dim=0).cpu().numpy()

  metrics = calculate_metrics(predicted_embeddings_np,target_embeddings_np)
  overall_test_accuracy = calculate_accuracy(metrics)
  overall_test_precision = calculate_precision(metrics)
  overall_test_recall = calculate_recall(metrics)
  overall_test_f1_score = calculate_f1_score(overall_test_precision, overall_test_recall)
  overall_test_hamming_distance = calculate_hamming_distance(predicted_embeddings_np,target_embeddings_np)

  print("----")
  print(f"Test Accuracy: {overall_test_accuracy:.4f}")
  print(f"Test Precision: {overall_test_precision:.4f}")
  print(f"Test Recall: {overall_test_recall:.4f}")
  print(f"Test F1-score: {overall_test_f1_score:.4f}")
  print(f"Test Hamming Distance: {overall_test_hamming_distance}")
  print("")

In [17]:
filename = 'LSTM_CLS/best_model_LSTM_CLS.pth'
best_model_path_lstm_cls_BCEWithLogitLoss = os.path.join(save_dir_base_BCEWithLogitLoss, filename)

best_model_lstm_cls_BCEWithLogitLoss = SentimentClassifierWithSoftAttention(rnn_type='LSTM').to(device)
best_model_lstm_cls_BCEWithLogitLoss.load_state_dict(torch.load(best_model_path_lstm_cls_BCEWithLogitLoss))
test_with_cls(best_model_lstm_cls_BCEWithLogitLoss, test_loader, t=thershold)

----
Test Accuracy: 0.8861
Test Precision: 0.9022
Test Recall: 0.6953
Test F1-score: 0.7854
Test Hamming Distance: 5436



In [None]:
best_model_path_lstm_cls_MultiLabelSoftMargin = os.path.join(save_dir_base_MultiLabelSoftMargin, filename)

best_model_lstm_cls_MultiLabelSoftMargin = SentimentClassifierWithSoftAttention(rnn_type='LSTM').to(device)
best_model_lstm_cls_MultiLabelSoftMargin.load_state_dict(torch.load(best_model_path_lstm_cls_MultiLabelSoftMargin))
test_with_cls(best_model_lstm_cls_MultiLabelSoftMargin, test_loader, t=thershold)

----
Test Accuracy: 0.8849
Test Precision: 0.8996
Test Recall: 0.6933
Test F1-score: 0.7831
Test Hamming Distance: 5494



In [20]:
def test_both_models_with_cls(model_rnn1, model_rnn2, test_loader, t=0.5):
  model_rnn1.eval()
  model_rnn2.eval()

  # Use the best model for prediction on the test set
  test_predictions = []
  target_embeddings = []
  with torch.no_grad():
      for data, targets, cls_tokens in test_loader:
          data, targets, cls_tokens = data.to(device), targets.to(device), cls_tokens.to(device)
          cls_tokens = cls_tokens.view(cls_tokens.shape[0], 1, cls_tokens.shape[1])

          outputs1, _ = model_rnn1.forward_with_cls(data.to(dtype=torch.float32), cls_tokens.to(dtype=torch.float32))
          outputs2, _ = model_rnn2.forward_with_cls(data.to(dtype=torch.float32), cls_tokens.to(dtype=torch.float32))

          predictions1 = torch.where(outputs1 > t, torch.tensor(1.0), torch.tensor(0.0)) #torch.round(torch.sigmoid(outputs))
          predictions2 = torch.where(outputs2 > t, torch.tensor(1.0), torch.tensor(0.0)) #torch.round(torch.sigmoid(outputs))

          predictions = torch.bitwise_and(predictions1.to(torch.int64),
                                          predictions2.to(torch.int64)).to(torch.float32)

          test_predictions.append(predictions.cpu())
          target_embeddings.append(targets.cpu())

  # Convert test_predictions list to a single numpy array if needed

  predicted_embeddings_np = torch.cat(test_predictions, dim=0).cpu().numpy()
  target_embeddings_np = torch.cat(target_embeddings, dim=0).cpu().numpy()

  metrics = calculate_metrics(predicted_embeddings_np,target_embeddings_np)
  overall_test_accuracy = calculate_accuracy(metrics)
  overall_test_precision = calculate_precision(metrics)
  overall_test_recall = calculate_recall(metrics)
  overall_test_f1_score = calculate_f1_score(overall_test_precision, overall_test_recall)
  overall_test_hamming_distance = calculate_hamming_distance(predicted_embeddings_np,target_embeddings_np)

  print("----")
  print(f"Test Accuracy: {overall_test_accuracy:.4f}")
  print(f"Test Precision: {overall_test_precision:.4f}")
  print(f"Test Recall: {overall_test_recall:.4f}")
  print(f"Test F1-score: {overall_test_f1_score:.4f}")
  print(f"Test Hamming Distance: {overall_test_hamming_distance}")
  print("")

In [None]:
test_both_models_with_cls(best_model_lstm_cls_BCEWithLogitLoss,
                             best_model_lstm_cls_MultiLabelSoftMargin, test_loader, t=thershold)

----
Test Accuracy: 0.8831
Test Precision: 0.9069
Test Recall: 0.6800
Test F1-score: 0.7772
Test Hamming Distance: 5576



In [18]:
filename = 'GRU_CLS/best_model_GRU_CLS.pth'

best_model_path_gru_cls_BCEWithLogitLoss = os.path.join(save_dir_base_BCEWithLogitLoss, filename)


best_model_gru_cls_BCEWithLogitLoss = SentimentClassifierWithSoftAttention(rnn_type='GRU').to(device)
best_model_gru_cls_BCEWithLogitLoss.load_state_dict(torch.load(best_model_path_gru_cls_BCEWithLogitLoss))
test_with_cls(best_model_gru_cls_BCEWithLogitLoss, test_loader, t=thershold)

----
Test Accuracy: 0.8879
Test Precision: 0.8963
Test Recall: 0.7078
Test F1-score: 0.7910
Test Hamming Distance: 5350



In [None]:
best_model_path_gru_cls_MultiLabelSoftMargin = os.path.join(save_dir_base_MultiLabelSoftMargin, filename)


best_model_gru_cls_MultiLabelSoftMargin = SentimentClassifierWithSoftAttention(rnn_type='GRU').to(device)
best_model_gru_cls_MultiLabelSoftMargin.load_state_dict(torch.load(best_model_path_gru_cls_MultiLabelSoftMargin))
test_with_cls(best_model_gru_cls_MultiLabelSoftMargin, test_loader, t=thershold)

----
Test Accuracy: 0.8871
Test Precision: 0.8880
Test Recall: 0.7134
Test F1-score: 0.7912
Test Hamming Distance: 5386



In [21]:
test_both_models_with_cls(best_model_gru_cls_BCEWithLogitLoss,
                             best_model_lstm_cls_BCEWithLogitLoss, test_loader, t=thershold)

----
Test Accuracy: 0.8837
Test Precision: 0.9069
Test Recall: 0.6820
Test F1-score: 0.7785
Test Hamming Distance: 5550

