# Recommender Systems 2024/25

### Practice 9 - Deep Learning Models

## The basics of Deep Learning: Multi-Layer Perceptron 

In [5]:
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
from Data_manager.Movielens.Movielens10MReader import Movielens10MReader

data_reader = Movielens10MReader()
data_loaded = data_reader.load_data()

URM_all = data_loaded.get_URM_all()

URM_train_val, URM_test = split_train_in_two_percentage_global_sample(URM_all, 0.8)
URM_train, URM_val = split_train_in_two_percentage_global_sample(URM_train_val, 0.8)

Movielens10M: Verifying data consistency...
Movielens10M: Verifying data consistency... Passed!
DataReader: current dataset is: Movielens10M
	Number of items: 10681
	Number of users: 69878
	Number of interactions in URM_all: 10000054
	Value range in URM_all: 0.50-5.00
	Interaction density: 1.34E-02
	Interactions per user:
		 Min: 2.00E+01
		 Avg: 1.43E+02
		 Max: 7.36E+03
	Interactions per item:
		 Min: 0.00E+00
		 Avg: 9.36E+02
		 Max: 3.49E+04
	Gini Index: 0.57

	ICM name: ICM_tags, Value range: 1.00 / 69.00, Num features: 10106, feature occurrences: 106820, density 9.90E-04
	ICM name: ICM_genres, Value range: 1.00 / 1.00, Num features: 20, feature occurrences: 21564, density 1.01E-01
	ICM name: ICM_all, Value range: 1.00 / 69.00, Num features: 10126, feature occurrences: 128384, density 1.19E-03
	ICM name: ICM_year, Value range: 1.92E+03 / 2.01E+03, Num features: 1, feature occurrences: 10681, density 1.00E+00




In [None]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim

In [7]:
# load iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# split into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# create a custom dataset class
class IrisDataset(torch.utils.data.Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [12]:
# create a custom nn.Module class
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(4, 16)
        self.fc2 = nn.Linear(16, 32)
        self.fc3 = nn.Linear(32, 3)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [17]:
# create a data loader and model
dataset = IrisDataset(X_train, y_train)
data_loader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=True)
model = MLP()

# define a loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.4)

# train the model
for epoch in range(10):
    running_loss = 0.0
    for i, data in enumerate(data_loader, 0):
        inputs, labels = data
        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    print('Epoch %d, loss: %.3f' % (epoch+1, running_loss/(i+1)))

# evaluate the model
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
    for data in data_loader:
        inputs, labels = data
        outputs = model(inputs)
        test_loss += criterion(outputs, labels).item()
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()

accuracy = correct / len(dataset)
print('Test loss: {:.3f}, Accuracy: {:.2f}'.format(test_loss/(len(data_loader)), accuracy*100))

Epoch 1, loss: 1.115
Epoch 2, loss: 0.997
Epoch 3, loss: 0.855
Epoch 4, loss: 1.016
Epoch 5, loss: 0.791
Epoch 6, loss: 0.535
Epoch 7, loss: 0.495
Epoch 8, loss: 0.509
Epoch 9, loss: 0.424
Epoch 10, loss: 0.531
Test loss: 0.403, Accuracy: 97.50


## Two-Tower Models

In [None]:
# Repro '19 on Maurizio's Github – NeuMF: principal components and explain
# https://github.com/MaurizioFD/RecSys2019_DeepLearning_Evaluation/blob/0fb6b7f5c396f8525316ed66cf9c9fdb03a5fa9b/Conferences/WWW/NeuMF_our_interface/NeuMF_RecommenderWrapper.py#L110C33-L110C65
# Rows 108-113

# Variant 1:
# 1. Make 2 embeddings of equal dimensions and concatenate
# 2. Couple of Dense layers
# 3. Obtain prediction (single score)

# Variant 2:
# 1. Couple of Dense layers process user/item profiles
# 2. Concatenate and final Dense layer to obtain prediciton

## AutoEncoders

### Denoising Autoencoder

In [None]:
from Recommenders.MatrixFactorization.PyTorch import MF_MSE_PyTorch
# ...
# Autoencoder for recommendation, search on internet
# Couple layers before and couple after bottleneck, basic noise (10 lines of code)

In [None]:
# Build the encoder

In [None]:
# Build the decoder

In [None]:
# Implement the training structure using PyTorch

In [None]:
# Train and evaluate

### $EASE^R$

In [None]:
# only import, training and evaluation (from repo)

## LightGCN


In [None]:
# PyTorch implementation (see repo)

Possibly, GF-CF