In [None]:
import pandas as pd
import seaborn as sns
import numpy as np
import sklearn
from sklearn.model_selection import train_test_split
import torch
import math
import pickle


: 

In [None]:
#split the dataset
# Load the dataset
dataset = pickle.load(open('datasets.pickle', 'rb'))

batchsize = 256
trainset = dataset[0]
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batchsize, shuffle=True)
valset = dataset[1]
valloader = torch.utils.data.DataLoader(valset, batch_size=batchsize, shuffle=False)  # Disable shuffling
testset = dataset[2]
testloader = torch.utils.data.DataLoader(testset, batch_size=batchsize, shuffle=False)  # Disable shuffling

input_dim = 4
output_dim = trainset[0][1].shape[0]

: 

In [5]:
!pip install pytorch-lightning
import pytorch_lightning as pl

Collecting pytorch-lightning
  Downloading pytorch_lightning-2.2.5-py3-none-any.whl (802 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m802.3/802.3 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
Collecting torchmetrics>=0.7.0 (from pytorch-lightning)
  Downloading torchmetrics-1.4.0.post0-py3-none-any.whl (868 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m868.8/868.8 kB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
Collecting lightning-utilities>=0.8.0 (from pytorch-lightning)
  Downloading lightning_utilities-0.11.2-py3-none-any.whl (26 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.13.0->pytorch-lightning)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.13.0->pytorch-lightning)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.13.0->p

In [30]:
class MLP(pl.LightningModule):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(MLP, self).__init__()
        self.emb1 = torch.nn.Embedding(100000, 32)
        self.emb2 = torch.nn.Embedding(100000, 32)
        self.timeday = torch.nn.Linear(2, 32)
        self.class1 = torch.nn.Linear(96, hidden_channels)
        self.dropout1 = torch.nn.Dropout(0.5)
        self.class2 = torch.nn.Linear(hidden_channels, 32)
        self.dropout2 = torch.nn.Dropout(0.5)
        self.class3 = torch.nn.Linear(32, 16)
        self.dropout3 = torch.nn.Dropout(0.5)
        self.class4 = torch.nn.Linear(16, out_channels)
        self.train_losses = []
        self.val_losses = []
        self.test_losses = []
        self.train_acc_at_k = []
        self.val_acc_at_k = []
        self.test_acc_at_k = []
        self.train_mrr = []
        self.val_mrr = []
        self.test_mrr = []
        self.epochs_trained = 0

    def forward(self, data):
        user = self.emb1(data[:, 0])
        loc = self.emb2(data[:, 1])
        hour = data[:, 2]
        day = data[:, 3]
        timeday = torch.stack([hour, day], dim=-1)
        timeday = self.timeday(timeday.float())
        x = torch.cat((user, loc, timeday), dim=1)
        x = self.class1(x).relu()
        x = self.dropout1(x)
        x = self.class2(x).relu()
        x = self.dropout2(x)
        x = self.class3(x).relu()
        x = self.dropout3(x)
        x = self.class4(x)
        return x

    def training_step(self, batch, batch_idx):
        input = batch[0]
        target = batch[1]
        output = self(input[:, :4])
        loss = F.cross_entropy(output, target)
        acc_at_k_value = self.accuracy_at_k(output, target, k=5)
        mrr_value = self.mean_reciprocal_rank(output, target)
        self.log('train_loss', loss, on_epoch=True, on_step=False)
        self.log('train_acc_at_k', acc_at_k_value, on_epoch=True, on_step=False)
        self.log('train_mrr', mrr_value, on_epoch=True, on_step=False)

        # Debug statements
        print(f"Epoch {self.current_epoch} Training loss: {loss.item()} Accuracy@k: {acc_at_k_value.item()} MRR: {mrr_value.item()}")

        return loss

    def validation_step(self, batch, batch_idx):
        input = batch[0]
        target = batch[1]
        output = self(input[:, :4])
        val_loss = F.cross_entropy(output, target)
        acc_at_k = self.accuracy_at_k(output, target, k=5)
        mrr = self.mean_reciprocal_rank(output, target)
        self.log('val_loss', val_loss, on_epoch=True, on_step=False)
        self.log('val_acc_at_k', acc_at_k, on_epoch=True, on_step=False)
        self.log('val_mrr', mrr, on_epoch=True, on_step=False)

        # Debug statements
        print(f"Epoch {self.current_epoch} Validation loss: {val_loss.item()} Accuracy@k: {acc_at_k.item()} MRR: {mrr.item()}")

        return val_loss

    def test_step(self, batch, batch_idx):
        input = batch[0]
        target = batch[1]
        output = self(input[:, :4])
        test_loss = F.cross_entropy(output, target)
        acc_at_k = self.accuracy_at_k(output, target, k=5)
        mrr = self.mean_reciprocal_rank(output, target)
        self.log('test_loss', test_loss)
        self.log('test_acc_at_k', acc_at_k)
        self.log('test_mrr', mrr)
        self.test_losses.append(test_loss.item())
        self.test_acc_at_k.append(acc_at_k.item())
        self.test_mrr.append(mrr.item())

        # Debug statements
        print(f"Test loss: {test_loss.item()} Accuracy@k: {acc_at_k.item()} MRR: {mrr.item()}")

        return test_loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=0.001, weight_decay=5e-4)
        return optimizer

    def accuracy_at_k(self, y_pred, y_true, k=5):
        _, top_k = y_pred.topk(k, dim=1)
        _, label = y_true.topk(1, dim=1)
        correct = top_k.eq(label.view(-1, 1).expand_as(top_k))
        acc_at_k = correct.float().sum(dim=1).mean()
        return acc_at_k

    def mean_reciprocal_rank(self, y_pred, y_true):
        _, rank = y_pred.sort(dim=1, descending=True)
        rank = rank.argsort(dim=1)
        _, label = y_true.topk(1, dim=1)
        rr = (1.0 / (rank.gather(1, label.view(-1, 1).long()) + 1)).mean()
        return rr

    def on_train_epoch_end(self):
        # Collect training metrics once per epoch
        epoch_loss = self.trainer.callback_metrics['train_loss']
        epoch_acc_at_k = self.trainer.callback_metrics['train_acc_at_k']
        epoch_mrr = self.trainer.callback_metrics['train_mrr']
        self.train_losses.append(epoch_loss.item())
        self.train_acc_at_k.append(epoch_acc_at_k.item())
        self.train_mrr.append(epoch_mrr.item())

        print(f"End of epoch {self.current_epoch} - Training loss: {epoch_loss.item()}, Accuracy@k: {epoch_acc_at_k.item()}, MRR: {epoch_mrr.item()}")

    def on_validation_epoch_end(self):
        # Collect validation metrics once per epoch
        epoch_val_loss = self.trainer.callback_metrics['val_loss']
        epoch_val_acc_at_k = self.trainer.callback_metrics['val_acc_at_k']
        epoch_val_mrr = self.trainer.callback_metrics['val_mrr']
        self.val_losses.append(epoch_val_loss.item())
        self.val_acc_at_k.append(epoch_val_acc_at_k.item())
        self.val_mrr.append(epoch_val_mrr.item())

        print(f"End of epoch {self.current_epoch} - Validation loss: {epoch_val_loss.item()}, Accuracy@k: {epoch_val_acc_at_k.item()}, MRR: {epoch_val_mrr.item()}")

class MetricsLengthCallback(Callback):
    def on_epoch_end(self, trainer, pl_module):
        print(f"Epoch {trainer.current_epoch} - Training losses length: {len(pl_module.train_losses)}, Validation losses length: {len(pl_module.val_losses)}")

numepoch = 100

model = MLP(input_dim, 64, output_dim)
criterion = torch.nn.CrossEntropyLoss()
trainer = pl.Trainer(
    max_epochs=numepoch,
    log_every_n_steps=1,
    callbacks=[
        ModelCheckpoint(monitor='train_loss'),
        EarlyStopping(monitor='train_loss', patience=10),
        MetricsLengthCallback()  # Add the custom callback here
    ]
)
trainer.fit(model, trainloader, valloader)


print(model)

# Check the lengths of the lists after training
print(f"Final Training losses length: {len(model.train_losses)}")
print(f"Final Validation losses length: {len(model.val_losses)}")




In [None]:
def plot_metrics(model):
    num_epochs = min(len(model.train_losses), len(model.val_losses))
    epochs = range(num_epochs)

    plt.figure(figsize=(10, 5))

    plt.subplot(1, 2, 1)
    plt.plot(epochs, model.train_losses[:num_epochs], label='Training Loss')
    plt.plot(epochs, model.val_losses[:num_epochs], label='Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.title('Loss')

    plt.subplot(1, 2, 2)
    plt.plot(epochs, model.train_acc_at_k[:num_epochs], label='Training Accuracy')
    plt.plot(epochs, model.val_acc_at_k[:num_epochs], label='Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.title('Accuracy')

    plt.tight_layout()
    plt.show()

# After training, call this function to visualize the metrics
plot_metrics(model)


In [None]:
# Stampa i risultati del test

test_result = trainer.test(model, dataloaders=testloader)[0]

test_losses = model.test_losses
train_losses = model.train_losses
val_losses = model.val_losses
train_accs = model.train_accs
val_accs = model.val_accs
print(f"Test Loss: {test_result['test_loss']}")
print(f"Test Accuracy: {test_result['test_acc']}")
print(f"Test Accuracy@5: {test_result['test_acc_at_k']}")
print(f"Test MRR: {test_result['test_mrr']}")


In [None]:
def plot_metrics(model):
    num_epochs = min(len(model.train_losses), len(model.val_losses))
    epochs = range(num_epochs)



    plt.figure(figsize=(12, 8))

    plt.subplot(2, 2, 1)
    plt.plot(epochs, model.train_losses[:num_epochs], label='Training Loss')
    plt.plot(epochs, model.val_losses[:num_epochs], label='Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.title('Loss')

    plt.subplot(2, 2, 2)
    plt.plot(epochs, model.train_accs[:num_epochs], label='Training Accuracy')
    plt.plot(epochs, model.val_accs[:num_epochs], label='Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.title('Accuracy')

    plt.subplot(2, 2, 3)
    plt.plot(epochs, model.train_acc_at_k[:num_epochs], label='Training Accuracy@k')
    plt.plot(epochs, model.val_acc_at_k[:num_epochs], label='Validation Accuracy@k')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy@k')
    plt.legend()
    plt.title('Accuracy@k')

    plt.subplot(2, 2, 4)
    plt.plot(epochs, model.train_mrr[:num_epochs], label='Training MRR')
    plt.plot(epochs, model.val_mrr[:num_epochs], label='Validation MRR')
    plt.xlabel('Epochs')
    plt.ylabel('MRR')
    plt.legend()
    plt.title('MRR')

    plt.tight_layout()
    plt.show()

    # Plot the metrics after training
plot_metrics(model)