# Tuning experiments for the Movielens dataset

Feel free to re-run this and change hyperparameters as you see fit.

This notebook should also be a good place to load the data only once and then train multiple times, generate charts, etc.


Mode collapse: drop out regularization, l2 regularization, decaying kl divergence

Regression: MSELoss

In [1]:
from data_loader import load_data, train_test_split_shuffle, MovielensAllMovieRatingsPerUserDataset, MovielensConcatDataset
import torch
import lightning as L
from torch.utils.data import DataLoader
from pytorch_lightning.loggers import TensorBoardLogger

In [2]:
# Starting hyperparameters
learning_rate = 1e-4
weight_decay = 5e-4
epochs = 100
latent_dim = 200
hidden_dim = 600
batch_size = 1000
loss_type = "mse"
total_anneal_steps = 20000
anneal_cap = 0.1


In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
# Load the data
dataset = '20m'
data, num_users, num_items = load_data(dataset, rescale_data=True)
print(data.head())
all_train_data, test_data = train_test_split_shuffle(data, 0.4)
train_data, validation_data = train_test_split_shuffle(all_train_data, 0.2)

   user_id  movie_id  rating   timestamp
0        0         0     0.7  1112486027
1        0         1     0.7  1112484676
2        0         2     0.7  1112484819
3        0         3     0.7  1112484727
4        0         4     0.7  1112484580


In [5]:
train = MovielensAllMovieRatingsPerUserDataset(data=train_data, num_users=num_users, num_items=num_items)
validation = MovielensConcatDataset(dataset1=train_data, dataset2=validation_data, num_users=num_users, num_items=num_items)
all_train = MovielensAllMovieRatingsPerUserDataset(data=all_train_data, num_users=num_users, num_items=num_items)
test = MovielensConcatDataset(dataset1=all_train_data, dataset2=test_data, num_users=num_users, num_items=num_items)

train_dataloader = DataLoader(train, batch_size=batch_size, num_workers=4, collate_fn=MovielensAllMovieRatingsPerUserDataset.sparse_collate)
validation_dataloader = DataLoader(validation, batch_size=batch_size, num_workers=4, collate_fn=MovielensConcatDataset.sparse_collate)
all_train_dataloader = DataLoader(all_train, batch_size=batch_size, num_workers=4, collate_fn=MovielensAllMovieRatingsPerUserDataset.sparse_collate)
test_dataloader = DataLoader(test, batch_size=batch_size, num_workers=4, collate_fn=MovielensConcatDataset.sparse_collate)

In [6]:
train.data.iloc[0]

user_id                                                      0
level_0      [83, 58, 109, 10, 7, 126, 53, 28, 46, 6, 64, 7...
index        [34, 59, 63, 83, 29, 109, 78, 44, 67, 71, 112,...
movie_id     [34, 59, 63, 83, 29, 109, 78, 44, 67, 71, 112,...
rating       [0.7, 0.8, 0.7, 0.7, 0.7, 0.8, 0.7, 0.8, 0.7, ...
timestamp    [1112484815, 1112484913, 1112486032, 111248499...
Name: 0, dtype: object

In [7]:
from vae.model import VariationalAutoencoder
model = VariationalAutoencoder(
    item_dim=num_items,
    embedding_dim=latent_dim,
    latent_dim=latent_dim,
    hidden_dim=hidden_dim,
    learning_rate=learning_rate,
    weight_decay=weight_decay,
    loss_type=loss_type,
    total_anneal_steps=total_anneal_steps,
    anneal_cap=anneal_cap,
)

In [8]:
torch.set_float32_matmul_precision('medium')
trainer = L.Trainer(accelerator="gpu", devices="auto", max_epochs=25, logger=TensorBoardLogger(save_dir="logs/"))
trainer.fit(model=model, train_dataloaders=train_dataloader, val_dataloaders=validation_dataloader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type               | Params
-----------------------------------------------
0 | encoder | VariationalEncoder | 6.1 M 
1 | decoder | VariationalDecoder | 16.2 M
-----------------------------------------------
22.3 M    Trainable params
0         Non-trainable params
22.3 M    Total params
89.057    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Exception ignored in: <function _releaseLock at 0x7d6139a7bee0>
Traceback (most recent call last):
  File "/home/albert/.pyenv/versions/3.9.18/lib/python3.9/logging/__init__.py", line 227, in _releaseLock
    def _releaseLock():
KeyboardInterrupt: 


RuntimeError: DataLoader worker (pid(s) 474758) exited unexpectedly

In [None]:
trainer.test(ckpt_path='best')