In [37]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [38]:
import torch
import numpy as np
from pathlib2 import Path
from tqdm import trange

from dataloader import load_data_100k
from model import GLocalNet, KernelNet
from metric import ndcg, rmse_matrix, mae_matrix
from loss import glocal_loss
from utils import set_all_random_seed

## Constant and Parameters

In [30]:
data_dir = Path('./movie_lens_100k/')

ckpt_dir = Path('./checkpoints/exp1/')
if not ckpt_dir.exists():
    ckpt_dir.mkdir()

weights_dir = Path('./weights/')

In [31]:
# Seed
set_all_random_seed(42)

# Model hyperparameters
n_hid = 500 # size of hidden layers
n_emb = 4 # AE embedding size
n_layers = 2 # number of hidden layers
gk_size = 3 # width=height of kernel for convolution

# Training hyperparameters
max_epoch_p = 500 # max number of epochs for pretraining
max_epoch_f = 1000 # max number of epochs for finetuning
patience_p = 10# number of consecutive rounds of early stopping condition before actual stop for pretraining
patience_f = 10 # and finetuning
tol_p = 1e-4 # minimum threshold for the difference between consecutive values of train rmse, used for early stopping, for pretraining
tol_f = 1e-6 # and finetuning
lambda_L2 = 20. # regularisation of number or parameters
lambda_sparse = 0.006 # regularisation of sparsity of the final matrix
dot_scale = 1 # dot product weight for global kernel
lr_p = 1e-3 # learning rate for pretraining
lr_ft = 1e-3# learning rate for finetuning

## Training

### Prepare

In [32]:
# Device
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Data, r indicate rating matrix, m indicate mask matrix (0 for missing 1 for existing)
n_m, n_u, train_R, train_M, val_R, val_M, test_R, test_M = load_data_100k(data_dir)

# Model
kernel_net = KernelNet(n_u, n_hid, n_emb, n_layers, lambda_sparse, lambda_L2)
kernel_net.to(device)
complete_model = GLocalNet(kernel_net, n_m, gk_size, dot_scale)
complete_model.to(device)

# Optimizer
optimizer_p = torch.optim.AdamW(complete_model.local_kernel_net.parameters(), lr=lr_p)
optimizer_ft = torch.optim.AdamW(complete_model.parameters(), lr=lr_ft)


data matrix loaded
num of users: 943
num of movies: 1682
num of training ratings: 76000
num of validation ratings: 4000
num of test ratings: 20000


### Pretraining (local features)

In [33]:
last_rmse, best_val_rmse = float('inf'), float('inf')
count = 0

X = torch.Tensor(train_R).to(device)
M = torch.Tensor(train_M).to(device)

In [35]:
with trange(max_epoch_p) as t:
    for epoch in t:
        # Training
        complete_model.local_kernel_net.train()
        optimizer_p.zero_grad()
        pred, reg_loss = complete_model.local_kernel_net(X)
        loss = glocal_loss(pred, reg_loss, M, X)
        loss.backward()
        optimizer_p.step()
        
        # Evaluation
        complete_model.local_kernel_net.eval()
        with torch.no_grad():
            pred = complete_model.local_kernel_net(X).cpu().numpy()
        pred = np.clip(pred, 1, 5)
            
        val_rmse = rmse_matrix(pred, val_M, val_R)
        train_rmse = rmse_matrix(pred, train_M, train_R)
        t.set_postfix(epoch=epoch, train_rmse=train_rmse, val_rmse=val_rmse)
        if val_rmse < best_val_rmse:
            best_val_rmse = val_rmse
            ckpt = {
            'train_rmse': train_rmse,
            'val_rmse': val_rmse,
            'state_dict': complete_model.state_dict()}
            torch.save(ckpt, str(ckpt_dir / 'pretrain.pth'))
        if abs(last_rmse - train_rmse) < tol_p:
            count += 1
        else:
            count = 0
        if count == patience_p:
            print('Early stopping at epoch {} with train rmse {:.4f} and val rmse {:.4f}'.format(epoch, train_rmse, val_rmse))
            break
        last_rmse = train_rmse

 89%|████████▉ | 447/500 [00:10<00:01, 43.88it/s, epoch=447, train_rmse=0.919, val_rmse=0.939]

Early stopping at epoch 447 with train rmse 0.9188 and val rmse 0.9394





### Fine-tuning (global features)

In [None]:
best_rmse, best_mae, best_ndcg, last_rmse = float("inf"), float("inf"), 0, float("inf")
best_epoch_rmse, best_epoch_mae, best_epoch_ndcg = 0, 0, 0

complete_model.load_state_dict(torch.load(str(ckpt_dir / 'pretrain.pth'))['state_dict'])

X = torch.Tensor(train_R).to(device)
M = torch.Tensor(train_M).to(device)

with torch.no_grad():
    complete_model.eval()
    X_local = complete_model.local_kernel_net(X)

In [None]:
with trange(max_epoch_f) as t:
    for epoch in t:
        # Training
        complete_model.train()
        optimizer_ft.zero_grad()
        pred, reg_loss = complete_model(X, X_local)
        loss = glocal_loss(pred, reg_loss, M, X)
        loss.backward()
        optimizer_ft.step()

        # Evaluation
        complete_model.eval()
        with torch.no_grad():
            pred = complete_model(X, X_local).cpu().numpy()
        pred = np.clip(pred, 1, 5)

        train_rmse = rmse_matrix(pred, train_M, train_R)
        train_mae = mae_matrix(pred, train_M, train_R)
        train_ndcg = ndcg(pred, train_R)
        val_rmse = rmse_matrix(pred, val_M, val_R)
        val_mae = mae_matrix(pred, val_M, val_R)
        val_ndcg = ndcg(pred, val_R)

        t.set_postfix(epoch=epoch,
                    train_mae=train_mae,
                    val_mae=val_mae,
                    train_rmse=train_rmse,
                    val_rmse=val_rmse,
                    train_ndcg=train_ndcg,
                    val_ndcg=val_ndcg)

        if val_mae < best_mae:
            best_mae = val_mae
            best_epoch_mae = epoch
        if val_rmse < best_rmse:
            best_rmse = val_rmse
            best_epoch_rmse = epoch
            ckpt = {
                'mae': val_mae,
                'rmse': val_rmse,
                'ndcg': val_ndcg,
                'state_dict': complete_model.state_dict()
            }
            torch.save(ckpt, str(ckpt_dir / 'finetune_best_rmse.pth'))
        if val_ndcg > best_ndcg:
            best_ndcg = val_ndcg
            best_epoch_ndcg = epoch
        
        if abs(last_rmse - val_rmse) < tol_f:
            count += 1
        else:
            count = 0
        if count == patience_f:
            print('Early stopping at epoch {} with train rmse {:.4f} and val rmse {:.4f}'.format(epoch, train_rmse, val_rmse))
            break
        last_rmse = train_rmse

print('Epoch:', best_epoch_rmse, 'Best RMSE:', best_rmse)
print('Epoch:', best_epoch_mae, 'Best MAE:', best_mae)
print('Epoch:', best_epoch_ndcg, 'Best NDCG:', best_ndcg)


100%|██████████| 1000/1000 [05:20<00:00,  3.12it/s, epoch=999, train_mae=0.665, train_ndcg=0.908, train_rmse=0.843, val_mae=0.711, val_ndcg=0.927, val_rmse=0.915]

Epoch: 244 Best RMSE: 0.912213141932683
Epoch: 986 Best MAE: 0.710681
Epoch: 992 Best NDCG: 0.929717053698112





In [None]:
# Evaluate on test set
complete_model.load_state_dict(torch.load(str(ckpt_dir / 'finetune_best_rmse.pth'))['state_dict'])
complete_model.eval()
with torch.no_grad():
    pred = complete_model(X, X_local).cpu().numpy()
pred = np.clip(pred, 1, 5)
test_rmse = rmse_matrix(pred, test_M, test_R)
test_ndcg = ndcg(pred, test_R)
test_mae = mae_matrix(pred, test_M, test_R)
print('Test RMSE:', test_rmse)
print('Test MAE:', test_mae)
print('Test NDCG:', test_ndcg)

Test RMSE: 0.9215279912065995
Test MAE: 0.7257653
Test NDCG: 0.8950813935668877


## Covert

In [None]:
complete_model.cpu()
torch.save(complete_model.state_dict(), str(weights_dir / 'best.pth'))