In [1]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.1.0-py3-none-any.whl.metadata (16 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.14.0-py3-none-any.whl.metadata (7.4 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.7-py3-none-any.whl.metadata (2.9 kB)
Downloading optuna-4.1.0-py3-none-any.whl (364 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m364.4/364.4 kB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.14.0-py3-none-any.whl (233 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.5/233.5 kB[0m [31m19.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Downloading Mako-1.3.7-py3-none-any.whl (78 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.9/78.9 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: M

In [2]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.utils.data import random_split
import optuna
from optuna.trial import TrialState
import numpy as np

import warnings

warnings.filterwarnings("ignore")

google_colab = True

In [3]:
if google_colab:
    from google.colab import drive

    drive.mount('/content/drive')
    train_path = "/content/drive/MyDrive/dis/train.csv"
    test_path = "/content/drive/MyDrive/dis/test.csv"
else:
    train_path = "../data/train.csv"
    test_path = "../data/test.csv"

Mounted at /content/drive


In [4]:
# Create a dataset class
class InteractionDataset(Dataset):
    def __init__(self, user_item_pairs, ratings):
        self.user_item_pairs = user_item_pairs
        self.ratings = ratings

    def __len__(self):
        return len(self.ratings)

    def __getitem__(self, idx):
        return self.user_item_pairs[idx], self.ratings[idx]


# Define the NCF model
class NCF(nn.Module):
    def __init__(self, num_users, num_items, latent_dim_gmf, latent_dim_mlp, last_mlp_layer):
        super(NCF, self).__init__()
        # Embeddings for MLP part
        self.user_embedding_mlp = nn.Embedding(num_users, latent_dim_mlp)
        self.item_embedding_mlp = nn.Embedding(num_items, latent_dim_mlp)

        # Embeddings for GMF part
        self.user_embedding_gmf = nn.Embedding(num_users, latent_dim_gmf)
        self.item_embedding_gmf = nn.Embedding(num_items, latent_dim_gmf)

        # Fully connected layers for MLP
        self.fc_layers = nn.Sequential(
            nn.Linear(latent_dim_mlp * 2, 128),
            nn.BatchNorm1d(128),  # Batch normalization
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(128, last_mlp_layer),
            nn.BatchNorm1d(last_mlp_layer),
            nn.ReLU(),
            nn.Dropout(0.5)
        )

        # Final output layer (after concatenating GMF and MLP outputs)
        self.final_layer = nn.Linear(latent_dim_gmf + last_mlp_layer, 1)  # GMF (latent_dim) + MLP output (64)

    def forward(self, user, item):
        # MLP embeddings
        user_embed_mlp = self.user_embedding_mlp(user)  # Shape: [batch_size, latent_dim]
        item_embed_mlp = self.item_embedding_mlp(item)  # Shape: [batch_size, latent_dim]

        # GMF embeddings
        user_embed_gmf = self.user_embedding_gmf(user)  # Shape: [batch_size, latent_dim]
        item_embed_gmf = self.item_embedding_gmf(item)  # Shape: [batch_size, latent_dim]

        # GMF interaction (element-wise product)
        gmf_output = torch.mul(user_embed_gmf, item_embed_gmf)  # Shape: [batch_size, latent_dim]

        # MLP interaction (concatenation)
        mlp_input = torch.cat([user_embed_mlp, item_embed_mlp], dim=-1)  # Shape: [batch_size, latent_dim * 2]
        mlp_output = self.fc_layers(mlp_input)  # Shape: [batch_size, 64]

        # Concatenate GMF and MLP outputs
        combined = torch.cat([gmf_output, mlp_output], dim=-1)  # Shape: [batch_size, latent_dim + 64]

        # Final prediction layer
        output = self.final_layer(combined).squeeze()  # Shape: [batch_size]

        return output

In [5]:
# Prepare data
train_df = pd.read_csv(train_path)
user_item_pairs = train_df[["user_id", "book_id"]].values
#Map the user_id and book_id to a unique index
user_to_index = {user_id: idx for idx, user_id in enumerate(train_df['user_id'].unique())}
item_to_index = {book_id: idx for idx, book_id in enumerate(train_df['book_id'].unique())}
train_df['user_idx'] = train_df['user_id'].apply(lambda x: user_to_index[x])
train_df['book_idx'] = train_df['book_id'].apply(lambda x: item_to_index[x])
user_item_index_pairs = train_df[["user_idx", "book_idx"]].values
ratings = train_df["rating"].values

# Define constants
num_users = len(user_to_index)
num_items = len(item_to_index)
latent_dim = 5
batch_size = 256
epochs = 30
lr = 0.003
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Dataset and dataloaders



In [6]:

def objective(trial):
    # Hyperparameter tuning
    latent_dim_gmf = trial.suggest_int('latent_dim_gmf', 3, 30, step=3)
    latent_dim_mlp = trial.suggest_int('latent_dim_mlp', 3, 30, step=3)
    last_mlp_layer = trial.suggest_int('last_mlp_layer', 32, 128, step=16)

    # batch_size = trial.suggest_categorical('batch_size', [256, 512])
    # lr = trial.suggest_loguniform('lr', 1e-4, 1e-2)
    # weight_decay = trial.suggest_loguniform('weight_decay', 1e-5, 1e-3)

    dataset = InteractionDataset(user_item_index_pairs, ratings)
    train_size = int(0.8 * len(dataset))
    val_size = len(dataset) - train_size
    train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    # Model setup
    model = NCF(num_users, num_items, latent_dim_gmf, latent_dim_mlp, last_mlp_layer).to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4)

    min_val_loss = float('inf')  # Track the lowest RMSE across epochs

    for epoch in range(epochs):
        train_one_epoch(model, train_loader, optimizer, criterion)
        avg_val_loss = evaluate_model(model, val_loader, criterion)

        # Update the lowest validation loss
        if avg_val_loss < min_val_loss:
            min_val_loss = avg_val_loss
            best_epoch = epoch
            trial.set_user_attr("best_epoch", best_epoch)  # Store best epoch for this trial

        trial.report(min_val_loss, epoch)  # Report the best validation loss so far

        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    print(f"Trial {trial.number}: Best Epoch = {trial.user_attrs.get('best_epoch', 'Not available')}")

    return np.sqrt(min_val_loss)  # Return the minimum validation RMSE


def train_one_epoch(model, train_loader, optimizer, criterion):
    model.train()
    for user_item, rating in train_loader:
        user = user_item[:, 0].long().to(device)
        item = user_item[:, 1].long().to(device)
        rating = rating.float().to(device)

        optimizer.zero_grad()
        predictions = model(user, item)
        loss = criterion(predictions, rating)
        loss.backward()
        optimizer.step()


def evaluate_model(model, val_loader, criterion):
    model.eval()
    total_val_loss = 0
    with torch.no_grad():
        for user_item, rating in val_loader:
            user = user_item[:, 0].long().to(device)
            item = user_item[:, 1].long().to(device)
            rating = rating.float().to(device)

            predictions = model(user, item)
            loss = criterion(predictions, rating)
            total_val_loss += loss.item()

    avg_val_loss = total_val_loss / len(val_loader)
    return avg_val_loss


# Optuna study
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=20)

# Summarize study results
print("Study statistics:")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(study.get_trials(states=[TrialState.PRUNED])))
print("  Number of complete trials: ", len(study.get_trials(states=[TrialState.COMPLETE])))

print("Best trial:")
trial = study.best_trial
print("  Value: ", trial.value)
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))


[I 2024-12-04 18:19:35,818] A new study created in memory with name: no-name-e103be65-6d2c-4e87-8c66-a0dbfe27ff43
[I 2024-12-04 18:20:31,228] Trial 0 finished with value: 0.8936254676617653 and parameters: {'latent_dim_gmf': 21, 'latent_dim_mlp': 27, 'last_mlp_layer': 96}. Best is trial 0 with value: 0.8936254676617653.


Trial 0: Best Epoch = 6


[I 2024-12-04 18:21:18,586] Trial 1 finished with value: 0.8690370009891756 and parameters: {'latent_dim_gmf': 21, 'latent_dim_mlp': 27, 'last_mlp_layer': 48}. Best is trial 1 with value: 0.8690370009891756.


Trial 1: Best Epoch = 4


[I 2024-12-04 18:22:05,351] Trial 2 finished with value: 0.8887962348746692 and parameters: {'latent_dim_gmf': 3, 'latent_dim_mlp': 27, 'last_mlp_layer': 128}. Best is trial 1 with value: 0.8690370009891756.


Trial 2: Best Epoch = 7


[I 2024-12-04 18:22:51,403] Trial 3 finished with value: 0.8708444400863293 and parameters: {'latent_dim_gmf': 21, 'latent_dim_mlp': 21, 'last_mlp_layer': 48}. Best is trial 1 with value: 0.8690370009891756.


Trial 3: Best Epoch = 5


[I 2024-12-04 18:23:37,306] Trial 4 finished with value: 0.8649057413350446 and parameters: {'latent_dim_gmf': 21, 'latent_dim_mlp': 3, 'last_mlp_layer': 112}. Best is trial 4 with value: 0.8649057413350446.


Trial 4: Best Epoch = 5


[I 2024-12-04 18:23:42,259] Trial 5 pruned. 
[I 2024-12-04 18:23:43,701] Trial 6 pruned. 
[I 2024-12-04 18:23:45,154] Trial 7 pruned. 
[I 2024-12-04 18:23:46,598] Trial 8 pruned. 
[I 2024-12-04 18:23:48,040] Trial 9 pruned. 
[I 2024-12-04 18:24:34,973] Trial 10 finished with value: 0.8367185289153402 and parameters: {'latent_dim_gmf': 30, 'latent_dim_mlp': 3, 'last_mlp_layer': 80}. Best is trial 10 with value: 0.8367185289153402.


Trial 10: Best Epoch = 7


[I 2024-12-04 18:25:20,917] Trial 11 finished with value: 0.8445696473367107 and parameters: {'latent_dim_gmf': 30, 'latent_dim_mlp': 3, 'last_mlp_layer': 80}. Best is trial 10 with value: 0.8367185289153402.


Trial 11: Best Epoch = 5


[I 2024-12-04 18:25:25,585] Trial 12 pruned. 
[I 2024-12-04 18:25:31,760] Trial 13 pruned. 
[I 2024-12-04 18:26:17,710] Trial 14 finished with value: 0.8614035160347218 and parameters: {'latent_dim_gmf': 27, 'latent_dim_mlp': 12, 'last_mlp_layer': 80}. Best is trial 10 with value: 0.8367185289153402.


Trial 14: Best Epoch = 5


[I 2024-12-04 18:27:04,552] Trial 15 finished with value: 0.8542408335847556 and parameters: {'latent_dim_gmf': 27, 'latent_dim_mlp': 6, 'last_mlp_layer': 80}. Best is trial 10 with value: 0.8367185289153402.


Trial 15: Best Epoch = 6


[I 2024-12-04 18:27:06,001] Trial 16 pruned. 
[I 2024-12-04 18:27:08,804] Trial 17 pruned. 
[I 2024-12-04 18:27:10,265] Trial 18 pruned. 
[I 2024-12-04 18:27:13,124] Trial 19 pruned. 


Study statistics:
  Number of finished trials:  20
  Number of pruned trials:  11
  Number of complete trials:  9
Best trial:
  Value:  0.8367185289153402
  Params: 
    latent_dim_gmf: 30
    latent_dim_mlp: 3
    last_mlp_layer: 80


In [7]:
import optuna.visualization as vis

# Visualize the optimization history
fig1 = vis.plot_optimization_history(study)
fig1.show()

# Visualize the hyperparameter importance
fig2 = vis.plot_param_importances(study)
fig2.show()

# Visualize the hyperparameter relationships
fig3 = vis.plot_parallel_coordinate(study)
fig3.show()