# Movie Recommendation System (1M)

## Initial library load 

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import TensorDataset, DataLoader, Dataset, Subset
import numpy as np
import pandas as pd
import seaborn as sns
import random
from datetime import datetime
from unidecode import unidecode

In [2]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)

<torch._C.Generator at 0x7f3fd8027450>

In [4]:
import os
if not os.path.exists('runs'):
    os.makedirs('runs')
writer = SummaryWriter('runs/lenet5_mnist')

## Data load and preprocessing

In [5]:
ratings = pd.read_csv("./ml-1m/ratings.dat", sep="::", header=None,
                      names=['UserID','MovieID','Rating','Timestamp'], engine="python")

In [6]:
# convert timestamp to datetime
ratings['Datetime'] = ratings['Timestamp'].apply(lambda ts: datetime.fromtimestamp(ts))

In [7]:
ratings['Label'] = 1

In [8]:
# Ensure user and movie IDs are zero-indexed for embedding layers.
ratings['UserID'] = ratings['UserID'] - 1  # Users: 0 to 6039
ratings['MovieID'] = ratings['MovieID'] - 1  # Movies: 0 to (n-1)

In [9]:
ratings.head(10)

Unnamed: 0,UserID,MovieID,Rating,Timestamp,Datetime,Label
0,0,1192,5,978300760,2000-12-31 23:12:40,1
1,0,660,3,978302109,2000-12-31 23:35:09,1
2,0,913,3,978301968,2000-12-31 23:32:48,1
3,0,3407,4,978300275,2000-12-31 23:04:35,1
4,0,2354,5,978824291,2001-01-07 00:38:11,1
5,0,1196,3,978302268,2000-12-31 23:37:48,1
6,0,1286,5,978302039,2000-12-31 23:33:59,1
7,0,2803,5,978300719,2000-12-31 23:11:59,1
8,0,593,4,978302268,2000-12-31 23:37:48,1
9,0,918,4,978301368,2000-12-31 23:22:48,1


In [10]:
movies = pd.read_csv("./ml-1m/movies.dat", sep="::", header=None,
                    names=['MovieID','Title','Genre'], engine="python",
                    encoding="utf-8",
                    lineterminator="\n",
                    on_bad_lines="skip")

In [11]:
movies.head(20)

Unnamed: 0,MovieID,Title,Genre
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy
5,6,Heat (1995),Action|Crime|Thriller
6,7,Sabrina (1995),Comedy|Romance
7,8,Tom and Huck (1995),Adventure|Children's
8,9,Sudden Death (1995),Action
9,10,GoldenEye (1995),Action|Adventure|Thriller


In [12]:
movies_norm = movies.copy()

In [13]:
movies_norm['Title'] = movies_norm['Title'].apply(lambda x: unidecode(x))

In [14]:
movies_norm['Genre_List'] = movies_norm['Genre'].apply(lambda x: x.split('|'))

In [15]:
movies_norm.head(10)

Unnamed: 0,MovieID,Title,Genre,Genre_List
0,1,Toy Story (1995),Animation|Children's|Comedy,"[Animation, Children's, Comedy]"
1,2,Jumanji (1995),Adventure|Children's|Fantasy,"[Adventure, Children's, Fantasy]"
2,3,Grumpier Old Men (1995),Comedy|Romance,"[Comedy, Romance]"
3,4,Waiting to Exhale (1995),Comedy|Drama,"[Comedy, Drama]"
4,5,Father of the Bride Part II (1995),Comedy,[Comedy]
5,6,Heat (1995),Action|Crime|Thriller,"[Action, Crime, Thriller]"
6,7,Sabrina (1995),Comedy|Romance,"[Comedy, Romance]"
7,8,Tom and Huck (1995),Adventure|Children's,"[Adventure, Children's]"
8,9,Sudden Death (1995),Action,[Action]
9,10,GoldenEye (1995),Action|Adventure|Thriller,"[Action, Adventure, Thriller]"


In [16]:
users = pd.read_csv("./ml-1m/users.dat", sep="::", engine="python",
                    header=None, names=['UserID','Gender','Age','Occupation','Zip-code'])

In [17]:
users.head(10)

Unnamed: 0,UserID,Gender,Age,Occupation,Zip-code
0,1,F,1,10,48067
1,2,M,56,16,70072
2,3,M,25,15,55117
3,4,M,45,7,2460
4,5,M,25,20,55455
5,6,F,50,9,55117
6,7,M,35,1,6810
7,8,M,25,12,11413
8,9,M,25,17,61614
9,10,F,35,1,95370


# Train / Test / Validation Split

In [18]:
def leave_one_out_split(df):
    # For each user, select the last interaction as test data.
    df = df.sort_values(['UserID', 'Timestamp'])
    test_list = df.groupby('UserID').tail(1)
    train_df = df.drop(test_list.index)
    return train_df, test_list

In [19]:
train_ratings_full, test_ratings = leave_one_out_split(ratings)
print("Train+Validation samples:", len(train_ratings_full))
print("Test samples:", len(test_ratings))

Train+Validation samples: 994169
Test samples: 6040


In [20]:
def stratified_train_val_split(df, val_frac=0.1, seed=seed):
    train_idx = []
    val_idx = []
    np.random.seed(seed)
    # Group by user and sample indices for validation per user.
    for user, group in df.groupby('UserID'):
        indices = group.index.tolist()
        n_val = int(np.ceil(len(indices) * val_frac))
        val_indices = np.random.choice(indices, size=n_val, replace=False)
        train_indices = list(set(indices) - set(val_indices))
        train_idx.extend(train_indices)
        val_idx.extend(val_indices)
    # Return DataFrames for train and validation splits.
    train_df = df.loc[train_idx].reset_index(drop=True)
    val_df = df.loc[val_idx].reset_index(drop=True)
    return train_df, val_df

In [21]:
train_ratings, val_ratings = stratified_train_val_split(train_ratings_full, val_frac=0.1)

In [22]:
print("Train samples:", len(train_ratings))
print("Validation samples:", len(val_ratings))

Train samples: 892037
Validation samples: 102132


In [23]:
# -------------------------
# Precompute User-Positive Movie Mapping for Negative Sampling
# -------------------------

def build_user_positive_dict(df):
    return df.groupby('UserID')['MovieID'].apply(set).to_dict()

In [24]:
user_positive_train = build_user_positive_dict(train_ratings)

### Negative Sampling of the dataset

In [25]:
# -------------------------
# Custom Dataset with Negative Sampling (only for train and validation)
# -------------------------

class MovieLensDatasetWithNegatives(Dataset):
    def __init__(self, df, user_positive, num_movies, num_negatives=4):
        """
        df: DataFrame with positive interactions.
        user_positive: Dictionary mapping user_id -> set of positive movie_ids.
        num_movies: Total number of movies.
        num_negatives: Number of negative samples to generate per positive instance.
        """
        self.df = df.reset_index(drop=True)
        self.user_positive = user_positive
        self.num_movies = num_movies
        self.num_negatives = num_negatives
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        # Get the positive sample.
        user_id = int(self.df.loc[idx, 'UserID'])
        pos_movie_id = int(self.df.loc[idx, 'MovieID'])
        
        # Start with the positive sample.
        samples = [(user_id, pos_movie_id, 1)]
        
        # Generate negative samples for this user.
        for _ in range(self.num_negatives):
            neg_movie_id = random.randint(0, self.num_movies - 1)
            while neg_movie_id in self.user_positive.get(user_id, set()):
                neg_movie_id = random.randint(0, self.num_movies - 1)
            samples.append((user_id, neg_movie_id, 0))
        
        # Return all samples (positive + negatives) for this positive instance.
        return samples

In [26]:
# A collate function to flatten batches since __getitem__ returns a list of samples.
def collate_fn(batch):
    # batch is a list of lists (each element is a list of (user, movie, label) tuples)
    flat_batch = [sample for sublist in batch for sample in sublist]
    user_ids, movie_ids, labels = zip(*flat_batch)
    return (torch.LongTensor(user_ids),
            torch.LongTensor(movie_ids),
            torch.FloatTensor(labels))

### Parameters and Hyperparameters

In [27]:
num_users = users['UserID'].nunique()
## num_movies = movies_norm['MovieID'].nunique() creates an error becuase ids go up to 3951 but unique ids there's only 3883
max_movie_id = ratings['MovieID'].max()
num_movies = max_movie_id + 1
batch_size = 256
embedding_dim = 32  # Hyperparameter choice

In [28]:
# Create Dataset objects
train_dataset = MovieLensDatasetWithNegatives(train_ratings, user_positive_train, num_movies, num_negatives=4)
val_dataset = MovieLensDatasetWithNegatives(val_ratings, build_user_positive_dict(val_ratings), num_movies, num_negatives=4)

In [29]:
# Create DataLoaders

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)

In [30]:
# For testing, we use only the positive interactions (held-out ones).
class MovieLensTestDataset(Dataset):
    def __init__(self, df):
        self.user_ids = torch.LongTensor(df['UserID'].values)
        self.movie_ids = torch.LongTensor(df['MovieID'].values)
        self.labels = torch.FloatTensor(df['Label'].values)
        
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        return self.user_ids[idx], self.movie_ids[idx], self.labels[idx]

In [31]:
test_dataset = MovieLensTestDataset(test_ratings)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

### Ranking Metrics

In [33]:
def evaluate_ranking(model, dataset, device, K=10):
    """
    Evaluates HR@K and NDCG@K on a dataset that returns
    candidate sets (1 positive + multiple negatives).
    """
    model.eval()
    hr_sum = 0.0
    ndcg_sum = 0.0
    num_users = len(dataset)  # each __getitem__ is for one user (or one positive sample)
    
    with torch.no_grad():
        for idx in range(num_users):
            candidate_list = dataset[idx]  # e.g., [(user_id, pos_item, 1), (user_id, neg_item1, 0), ...]
            
            user_ids, item_ids, labels = zip(*candidate_list)
            user_ids = torch.LongTensor(user_ids).to(device)
            item_ids = torch.LongTensor(item_ids).to(device)
            labels = torch.FloatTensor(labels).to(device)

            # Predict scores for each candidate
            scores = model(user_ids, item_ids).cpu().numpy()
            labels = labels.cpu().numpy()
            
            # Sort candidates by predicted score (descending)
            sorted_indices = np.argsort(-scores)
            
            # Find the rank of the positive item
            for rank, sorted_idx in enumerate(sorted_indices, start=1):
                if labels[sorted_idx] == 1:
                    # Hit Ratio
                    if rank <= K:
                        hr_sum += 1.0
                        # NDCG
                        ndcg_sum += 1.0 / np.log2(rank + 1)
                    break
    
    hr_avg = hr_sum / num_users
    ndcg_avg = ndcg_sum / num_users
    return hr_avg, ndcg_avg

# Neural Network architecture (GMF)

source: https://arxiv.org/pdf/1708.05031 

In [32]:
class GMF(nn.Module):
    def __init__(self, num_users, num_items, embedding_dim):
        super(GMF, self).__init__()
        # Embedding layers for users and items
        self.user_embedding = nn.Embedding(num_users, embedding_dim)
        self.item_embedding = nn.Embedding(num_items, embedding_dim)
        # Output layer weight (h) for combining element-wise product (Learnable weight vector)
        self.h = nn.Parameter(torch.randn(embedding_dim))
        # Sigmoid activation to map predictions to [0, 1]
        self.sigmoid = nn.Sigmoid()
      
    def forward(self, user_ids, item_ids):
        os.environ["CUDA_LAUNCH_BLOCKING"] = "1"  
        p_u = self.user_embedding(user_ids)  # shape: [batch_size, embedding_dim]
        q_i = self.item_embedding(item_ids)  # shape: [batch_size, embedding_dim]
        interaction = p_u * q_i              # Element-wise product
        # Linear combination using the weight vector h
        score = torch.sum(interaction * self.h, dim=1)  # Weighted sum
        prediction = self.sigmoid(score)     # Map to [0, 1]
        return prediction

In [None]:
model = GMF(num_users, num_movies, embedding_dim)
model = model.to(DEVICE)

In [34]:
print("UserID range:", ratings['UserID'].min(), ratings['UserID'].max())
print("MovieID range:", ratings['MovieID'].min(), ratings['MovieID'].max())


UserID range: 0 6039
MovieID range: 0 3951


In [35]:
print(num_users)
print(num_movies)

6040
3952


### Training only for GMF

In [36]:
criterion = nn.BCELoss()  # Binary Cross-Entropy Loss for binary classification
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [38]:
# -------------------------
# Training Loop with Validation
# -------------------------
num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0.0
    for user_ids, movie_ids, labels in train_loader:
        user_ids = user_ids.to(DEVICE)
        movie_ids = movie_ids.to(DEVICE)
        labels = labels.to(DEVICE)
        
        optimizer.zero_grad()
        predictions = model(user_ids, movie_ids)
        loss = criterion(predictions, labels)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item() * user_ids.size(0)
    
    avg_loss = epoch_loss / len(train_dataset)  # based on number of positive samples
    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {avg_loss:.4f}")
    
    # Validation step
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for user_ids, movie_ids, labels in val_loader:
            user_ids = user_ids.to(DEVICE)
            movie_ids = movie_ids.to(DEVICE)
            labels = labels.to(DEVICE)
            
            predictions = model(user_ids, movie_ids)
            loss = criterion(predictions, labels)
            val_loss += loss.item() * user_ids.size(0)
    avg_val_loss = val_loss / len(val_dataset)
    print(f"Epoch {epoch+1}/{num_epochs}, Validation Loss: {avg_val_loss:.4f}")

Epoch 1/10, Train Loss: 6.1076
Epoch 1/10, Validation Loss: 3.6098
Epoch 2/10, Train Loss: 3.4985
Epoch 2/10, Validation Loss: 3.4664
Epoch 3/10, Train Loss: 3.4660
Epoch 3/10, Validation Loss: 3.4650
Epoch 4/10, Train Loss: 3.2076
Epoch 4/10, Validation Loss: 2.7234
Epoch 5/10, Train Loss: 2.1992
Epoch 5/10, Validation Loss: 2.1273
Epoch 6/10, Train Loss: 1.8791
Epoch 6/10, Validation Loss: 1.9733
Epoch 7/10, Train Loss: 1.7879
Epoch 7/10, Validation Loss: 1.9204
Epoch 8/10, Train Loss: 1.7478
Epoch 8/10, Validation Loss: 1.8964
Epoch 9/10, Train Loss: 1.7114
Epoch 9/10, Validation Loss: 1.8792
Epoch 10/10, Train Loss: 1.6659
Epoch 10/10, Validation Loss: 1.8632


In [39]:
# -------------------------
# Evaluation on Test Data
# -------------------------
model.eval()
test_loss = 0.0
with torch.no_grad():
    for user_ids, movie_ids, labels in test_loader:
        user_ids = user_ids.to(DEVICE)
        movie_ids = movie_ids.to(DEVICE)
        labels = labels.to(DEVICE)       
        
        predictions = model(user_ids, movie_ids)
        loss = criterion(predictions, labels)
        test_loss += loss.item() * user_ids.size(0)
avg_test_loss = test_loss / len(test_dataset)
print(f"Test Loss: {avg_test_loss:.4f}")

Test Loss: 1.1887


In [40]:
hr, ndcg = evaluate_ranking(model, val_dataset, DEVICE, K=10)
print(f"Validation HR@10: {hr:.4f}, NDCG@10: {ndcg:.4f}")

Validation HR@10: 1.0000, NDCG@10: 0.8241


## Comparison with traditional MF

In [39]:
def train_with_validation(model,
                          train_loader,
                          val_loader,
                          val_dataset,
                          device,
                          epochs=10,
                          lr=1e-2,
                          neg_sampling_K=10):
    """
    Trains `model` for `epochs`, logging:
      - avg BCE loss on training set
      - avg BCE loss on validation loader
      - HR@K and NDCG@K on val_dataset (for ranking)
    Returns a dict with keys:
      'train_loss', 'val_loss', 'val_hr', 'val_ndcg'
    """
    # 1. Prepare
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    loss_fn   = nn.BCELoss()
    
    history = {
        'train_loss': [],
        'val_loss':   [],
        'val_hr':     [],
        'val_ndcg':   []
    }
    # 2. Epoch loop
    for epoch in range(1, epochs+1):
        model.train()
        epoch_loss = 0.0
        # 2a. Training
        for user_ids, movie_ids, labels in train_loader:
            user_ids = user_ids.to(DEVICE)
            movie_ids = movie_ids.to(DEVICE)
            labels = labels.to(DEVICE)
            
            optimizer.zero_grad()
            predictions = model(user_ids, movie_ids)
            loss = loss_fn(predictions, labels)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item() * user_ids.size(0)
    
        avg_train_loss = epoch_loss / len(train_loader.dataset)  # based on number of positive samples
        # ---------- Validation Loss ----------
        # 2b. Validation (ranking)
        model.eval()
        val_loss = 0.0
            
        with torch.no_grad():
            for user_ids, movie_ids, labels in val_loader:
                user_ids = user_ids.to(DEVICE)
                movie_ids = movie_ids.to(DEVICE)
                labels = labels.to(DEVICE)
                
                predictions = model(user_ids, movie_ids)
                loss = loss_fn(predictions, labels)
                val_loss += loss.item() * user_ids.size(0)
        avg_val_loss = val_loss / len(val_dataset)

        # ---------- Ranking Metrics on val_dataset ----------
        hr, ndcg = evaluate_ranking(model, val_dataset, device, K=neg_sampling_K)

        # ---------- Logging ----------
        print(f"Epoch {epoch}/{epochs}  "
              f"Train Loss: {avg_train_loss:.4f}  "
              f"Val Loss: {avg_val_loss:.4f}  "
              f"HR@{K}: {hr:.4f}  "
              f"NDCG@{K}: {ndcg:.4f}")

        history['train_loss'].append(avg_train_loss)
        history['val_loss']  .append(avg_val_loss)
        history['val_hr']    .append(hr)
        history['val_ndcg']  .append(ndcg)
    
    return history

In [35]:
import torch.nn as nn

class MF(nn.Module):
    def __init__(self, num_users, num_items, embedding_dim):
        super(MF, self).__init__()
        # Embeddings exactly like GMF
        self.user_embedding = nn.Embedding(num_users, embedding_dim)
        self.item_embedding = nn.Embedding(num_items, embedding_dim)
        # No learnable h vector here
    
    def forward(self, user_ids, item_ids):
        p_u = self.user_embedding(user_ids)  # [B, K]
        q_i = self.item_embedding(item_ids)  # [B, K]
        # Classic inner-product
        score = (p_u * q_i).sum(dim=1)       # [B]
        # If using implicit feedback + BCE, pass through sigmoid:
        return torch.sigmoid(score)


In [None]:
# Hyperparameters
epochs = 5
lr     = 1e-3
K      = 10

# 1) Compare MF
mf = MF(num_users, num_movies, embedding_dim)
print("\n--- Training MF ---")
hist_mf = train_with_validation(
    mf,
    train_loader,
    val_loader,
    val_dataset,
    DEVICE,
    epochs=epochs,
    lr=lr,
    neg_sampling_K=K
)

# 2) Compare GMF
gmf = GMF(num_users, num_movies, embedding_dim)
print("\n--- Training GMF ---")
hist_gmf = train_with_validation(
    gmf,
    train_loader,
    val_loader,
    val_dataset,
    DEVICE,
    epochs=epochs,
    lr=lr,
    neg_sampling_K=K
)



--- Training MF ---
Epoch 1/5  Train Loss: 9.9897  Val Loss: 8.0585  HR@10: 1.0000  NDCG@10: 0.5886
Epoch 2/5  Train Loss: 6.5507  Val Loss: 5.2933  HR@10: 1.0000  NDCG@10: 0.6084
Epoch 3/5  Train Loss: 3.7962  Val Loss: 2.9333  HR@10: 1.0000  NDCG@10: 0.7274
Epoch 4/5  Train Loss: 2.2783  Val Loss: 2.2476  HR@10: 1.0000  NDCG@10: 0.7825
Epoch 5/5  Train Loss: 1.9013  Val Loss: 2.0368  HR@10: 1.0000  NDCG@10: 0.8029

--- Training GMF ---
Epoch 1/5  Train Loss: 5.6648  Val Loss: 3.4807  HR@10: 1.0000  NDCG@10: 0.5875
Epoch 2/5  Train Loss: 3.4676  Val Loss: 3.4665  HR@10: 1.0000  NDCG@10: 0.5897
Epoch 3/5  Train Loss: 3.4651  Val Loss: 3.4564  HR@10: 1.0000  NDCG@10: 0.5987
Epoch 4/5  Train Loss: 3.0116  Val Loss: 2.5453  HR@10: 1.0000  NDCG@10: 0.7484
Epoch 5/5  Train Loss: 2.1176  Val Loss: 2.0937  HR@10: 1.0000  NDCG@10: 0.7979


## Optimization with optuna

In [41]:
import optuna


def objective(trial):
    # 1) Suggest hyperparameters
    embedding_dim = trial.suggest_categorical("embedding_dim", [16, 32, 64, 128])
    lr            = trial.suggest_float("lr", 1e-4, 1e-2, log=True)
    neg_ratio     = trial.suggest_int("neg_ratio", 2, 8)
    
    # 2) Build dataset & loaders
    train_ds = MovieLensDatasetWithNegatives(
        train_ratings,
        user_positive_train,
        num_movies,
        num_negatives=neg_ratio
    )
    train_loader = DataLoader(
        train_ds, batch_size=256, shuffle=True, collate_fn=collate_fn
    )
    # Validation loader for loss
    val_loader = DataLoader(
        MovieLensDatasetWithNegatives(
            val_ratings,                # your per‐user stratified val set
            build_user_positive_dict(val_ratings),
            num_movies,
            num_negatives=neg_ratio     # or a fixed neg sampling for val
        ),
        batch_size=256,
        shuffle=False,
        collate_fn=collate_fn
    )
    
    # 3) Build model, loss, optimizer
    model     = GMF(num_users, num_movies, embedding_dim).to(DEVICE)
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    
    # 4) Training loop
    num_epochs = 5
    for _ in range(num_epochs):
        model.train()
        for u, i, y in train_loader:
            u, i, y = u.to(DEVICE), i.to(DEVICE), y.to(DEVICE)
            optimizer.zero_grad()
            pred = model(u, i)
            loss = criterion(pred, y)
            loss.backward()
            optimizer.step()
    
    # 5) Validation loss
    model.eval()
    val_loss, val_count = 0.0, 0
    with torch.no_grad():
        for u, i, y in val_loader:
            u, i, y = u.to(DEVICE), i.to(DEVICE), y.to(DEVICE)
            pred = model(u, i)
            l = criterion(pred, y)
            val_loss  += l.item() * u.size(0)
            val_count += u.size(0)
    avg_val_loss = val_loss / val_count
    
    # 6) Ranking metric (NDCG@10) on the held-out leave-one-out val_dataset
    hr, ndcg = evaluate_ranking(model, val_dataset, DEVICE, K=10)
    
    # 7) Return the two objectives:
    #    1) avg_val_loss to minimize
    #    2) -ndcg       to minimize (i.e. maximize ndcg)
    return avg_val_loss, -ndcg

# Create a multi-objective study: minimize both objectives
study = optuna.create_study(directions=["minimize", "minimize"])
study.optimize(objective, n_trials=30)

# Print Pareto-optimal trials
print("Pareto front (val_loss, -ndcg) and params:")
for t in study.best_trials:
    print(f"  values={t.values}, params={t.params}")


  from .autonotebook import tqdm as notebook_tqdm
[I 2025-04-27 21:50:02,285] A new study created in memory with name: no-name-0d480fd5-da6c-4735-bd33-0b68ec4a808a
[I 2025-04-27 21:52:53,076] Trial 0 finished with values: [0.4126032123796127, -0.822522603190414] and parameters: {'embedding_dim': 64, 'lr': 0.0026232317154338504, 'neg_ratio': 3}.
[I 2025-04-27 21:55:52,651] Trial 1 finished with values: [0.3834005135949311, -0.8137022808201813] and parameters: {'embedding_dim': 16, 'lr': 0.0018567045923492092, 'neg_ratio': 4}.
[I 2025-04-27 21:58:42,992] Trial 2 finished with values: [0.6442792700674439, -0.6509073654097698] and parameters: {'embedding_dim': 128, 'lr': 0.0005588007131489181, 'neg_ratio': 4}.
[I 2025-04-27 22:01:42,102] Trial 3 finished with values: [0.6111817945582613, -0.6869701549432307] and parameters: {'embedding_dim': 16, 'lr': 0.00043130822749060105, 'neg_ratio': 4}.
[I 2025-04-27 22:04:40,743] Trial 4 finished with values: [0.3419016402443684, -0.8198337464499664]

Pareto front (val_loss, -ndcg) and params:
  values=[0.27446849833797055, -0.8507463853175136], params={'embedding_dim': 16, 'lr': 0.004217344538619473, 'neg_ratio': 7}
  values=[0.29054415641742165, -0.8561847563600465], params={'embedding_dim': 16, 'lr': 0.004475592325958436, 'neg_ratio': 6}
  values=[0.26816794184961823, -0.8295817727538515], params={'embedding_dim': 64, 'lr': 0.0024276509208660843, 'neg_ratio': 8}


## Training and Test with the best parameters (from optuna study)

In [43]:
model_best = GMF(num_users, num_movies, embedding_dim=16)
model_best = model_best.to(DEVICE)
criterion_best = nn.BCELoss()
optimizer_best = optim.Adam(model_best.parameters(), lr=0.004217344538619473)

# Rebuild your dataset with the chosen negative sampling ratio
train_dataset_best = MovieLensDatasetWithNegatives(
    train_ratings,
    user_positive_train,
    num_movies,
    num_negatives=7
)
train_loader_best = DataLoader(train_dataset_best, batch_size=256, shuffle=True, collate_fn=collate_fn)
    

In [46]:
# -------------------------
# Training Loop with Validation
# -------------------------
num_epochs = 10

for epoch in range(num_epochs):
    model_best.train()
    epoch_loss = 0.0
    for user_ids, movie_ids, labels in train_loader_best:
        user_ids = user_ids.to(DEVICE)
        movie_ids = movie_ids.to(DEVICE)
        labels = labels.to(DEVICE)
        
        optimizer_best.zero_grad()
        predictions = model_best(user_ids, movie_ids)
        loss = criterion_best(predictions, labels)
        loss.backward()
        optimizer_best.step()
        epoch_loss += loss.item() * user_ids.size(0)
    
    avg_loss = epoch_loss / len(train_dataset)  # based on number of positive samples
    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {avg_loss:.4f}")
    
    # Validation step
    model_best.eval()
    val_loss = 0.0
    with torch.no_grad():
        for user_ids, movie_ids, labels in val_loader:
            user_ids = user_ids.to(DEVICE)
            movie_ids = movie_ids.to(DEVICE)
            labels = labels.to(DEVICE)
            
            predictions = model_best(user_ids, movie_ids)
            loss = criterion_best(predictions, labels)
            val_loss += loss.item() * user_ids.size(0)
    avg_val_loss = val_loss / len(val_dataset)
    print(f"Epoch {epoch+1}/{num_epochs}, Validation Loss: {avg_val_loss:.4f}")

Epoch 1/10, Train Loss: 5.6443
Epoch 1/10, Validation Loss: 2.4885
Epoch 2/10, Train Loss: 2.3988
Epoch 2/10, Validation Loss: 1.9487
Epoch 3/10, Train Loss: 2.1848
Epoch 3/10, Validation Loss: 1.9250
Epoch 4/10, Train Loss: 2.0884
Epoch 4/10, Validation Loss: 1.8170
Epoch 5/10, Train Loss: 1.9081
Epoch 5/10, Validation Loss: 1.7429
Epoch 6/10, Train Loss: 1.7835
Epoch 6/10, Validation Loss: 1.6983
Epoch 7/10, Train Loss: 1.7065
Epoch 7/10, Validation Loss: 1.6797
Epoch 8/10, Train Loss: 1.6586
Epoch 8/10, Validation Loss: 1.6682
Epoch 9/10, Train Loss: 1.6287
Epoch 9/10, Validation Loss: 1.6540
Epoch 10/10, Train Loss: 1.6072
Epoch 10/10, Validation Loss: 1.6600
