<center>

# Natural language processing
## Project - Sentence segmentation
## Model testing

### 2023./2024.
## Matea Kunac, Marijana Rendulić
</center>

# 1. Introduction

This notebook focuses on the model testing.

#2. Code

##Libraries

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import torch
import pickle
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pickle
from sklearn.metrics import precision_score, recall_score, f1_score
import matplotlib.pyplot as plt
import numpy as np
from torch.nn.utils.rnn import pad_sequence

##Functions

In [None]:
class TextChunkDataset(Dataset):
    """
    Loads data chunks and their corresponding labels from specified pickle files
    """
    def __init__(self, chunks_file, labels_file):
        with open(chunks_file, 'rb') as f:
            self.chunks = pickle.load(f)
        with open(labels_file, 'rb') as f:
            self.labels = pickle.load(f)

    def __len__(self):
        return len(self.chunks)

    def __getitem__(self, idx):
        return torch.tensor(self.chunks[idx], dtype=torch.long), torch.tensor(self.labels[idx], dtype=torch.float32)

In [None]:
def pad_collate(batch):
    """
    Pads sequences to match the longest sequence in a batch
    """
    (xx, yy) = zip(*batch)

    xx_pad = pad_sequence(xx, batch_first=True, padding_value=0)

    yy_pad = pad_sequence(yy, batch_first=True, padding_value=0)

    return xx_pad, yy_pad

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def evaluate(model, data_loader):
    """
    A function to evaluate the model's performance on a given dataset.
    It switches the model to evaluation mode, computes predictions for the dataset,
    and calculates evaluation metrics such as accuracy, precision, recall, and F1 score.
    """
    model.eval()
    all_predictions = []
    all_labels = []
    with torch.no_grad():
        for chunks, labels in data_loader:
            chunks, labels = chunks.to(device), labels.to(device)
            outputs = model(chunks).squeeze(-1)
            predicted = torch.round(torch.sigmoid(outputs))

            all_predictions.extend(predicted.view(-1).cpu().numpy())
            all_labels.extend(labels.view(-1).cpu().numpy())

    all_predictions = np.array(all_predictions)
    all_labels = np.array(all_labels)

    accuracy = 100 * (all_predictions == all_labels).mean()
    precision = precision_score(all_labels, all_predictions)
    recall = recall_score(all_labels, all_predictions)
    f1 = f1_score(all_labels, all_predictions)

    return accuracy, precision, recall, f1

In [None]:
class MultiLayerBiGRUModel(nn.Module):
    """
    multi-layer Bidirectional GRU architecture

    The model consists of the following components:
    - Embedding Layer: Converts input tokens into dense vectors of a specified size (embedding_dim)
    - Multi-Layer BiGRU: Processes the embedded input sequentially in both forward and backward directions across multiple layers (num_layers)
    - Fully Connected (Linear) Layer: Transforms the BiGRU's output to the desired output dimension (output_dim)
    """
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim=1, num_layers=3):
        super(MultiLayerBiGRUModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.gru = nn.GRU(embedding_dim, hidden_dim, num_layers=num_layers, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(hidden_dim * 2, output_dim)  # Times 2 because it's bidirectional

    def forward(self, text):
        embedded = self.embedding(text)
        output, hidden = self.gru(embedded)
        final_output = self.fc(output)
        return final_output

##Data loading

In [None]:
test_dataset = TextChunkDataset('/content/drive/MyDrive/Sentence_segmentation_popravak/data/test_encoded_chunks.pkl', '/content/drive/MyDrive/Sentence_segmentation_popravak/data/test_encoded_labels.pkl')
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, collate_fn=pad_collate)

##Model testing

In [None]:
def load_pickle(file_path):
    """
    Load and return the contents of a pickle file.
    """
    with open(file_path, 'rb') as f:
        return pickle.load(f)

vocab = load_pickle('/content/drive/MyDrive/Sentence_segmentation_popravak/data/vocab.pkl')
# Model parameters
vocab_size = len(vocab) + 1
embedding_dim = 100
hidden_dim = 128

In [None]:
# Initialize the model
model = MultiLayerBiGRUModel(vocab_size, embedding_dim, hidden_dim)

model_path = '/content/drive/MyDrive/Sentence_segmentation_popravak/models/best_model_epoch_12_f1_0.90.pth'
model.load_state_dict(torch.load(model_path))

# Move model to the appropriate device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

MultiLayerBiGRUModel(
  (embedding): Embedding(58375, 100)
  (gru): GRU(100, 128, num_layers=3, batch_first=True, bidirectional=True)
  (fc): Linear(in_features=256, out_features=1, bias=True)
)

In [None]:
test_accuracy, test_precision, test_recall, test_f1 = evaluate(model, test_loader)
print(f'Accuracy on test set: {test_accuracy:.2f}%')
print(f'Precision on test set: {test_precision:.2f}')
print(f'Recall on test set: {test_recall:.2f}')
print(f'F1 Score on test set: {test_f1:.2f}')

Accuracy on test set: 99.38%
Precision on test set: 0.96
Recall on test set: 0.88
F1 Score on test set: 0.92


In [None]:
def print_labels_predictions(model, data_loader):
    """
    Evaluates a trained model on a given dataset and prints the actual and predicted labels for comparison
    """
    model.eval()
    all_predictions = []
    all_labels = []
    with torch.no_grad():
        for chunks, labels in data_loader:
            chunks, labels = chunks.to(device), labels.to(device)
            outputs = model(chunks).squeeze(-1)
            predicted = torch.round(torch.sigmoid(outputs))

            all_predictions.extend(predicted.view(-1).cpu().numpy())
            all_labels.extend(labels.view(-1).cpu().numpy())

    all_predictions = np.array(all_predictions)
    all_labels = np.array(all_labels)

    print("Actual Labels:", all_labels)
    print("Predicted Labels:", all_predictions)

print_labels_predictions(model, test_loader)

Actual Labels: [1. 0. 0. ... 0. 0. 0.]
Predicted Labels: [1. 0. 0. ... 0. 0. 0.]
