# PyTorch Model

In [1]:
import torch
import torch.nn as nn
import numpy as np
import math

from settings import *
from utils import load_np_arr, save_tensor

In [2]:
dev = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)

print(f"Using {dev} device")

Using cuda device


## Collaborative filtering cost function

|General <br />  Notation  | Description                                                                | Python (if any) |
|:-------------------------|:---------------------------------------------------------------------------|-----------------|
| $r(i,j)$                 | scalar; = 1  if user j rated movie i  = 0  otherwise                       |                 |
| $y(i,j)$                 | scalar; = rating given by user j on movie  i    (if r(i,j) = 1 is defined) |                 |
| $\mathbf{w}^{(j)}$       | vector; parameters for user j                                              |                 |
| $b^{(j)}$                | scalar; parameter for user j                                               |                 |
| $\mathbf{x}^{(i)}$       | vector; feature ratings for movie i                                        |                 |     
| $n_m$                    | number of movies                                                           | num_movies      |
| $n_u$                    | number of users                                                            | num_users       |
| $n$                      | number of features                                                         | num_features    |
| $\mathbf{X}$             | matrix of vectors $\mathbf{x}^{(i)}$                                       | X               |
| $\mathbf{W}$             | matrix of vectors $\mathbf{w}^{(j)}$                                       | W               |
| $\mathbf{b}$             | vector of bias parameters $b^{(j)}$                                        | b               |
| $\mathbf{R}$             | matrix of elements $r(i,j)$                                                | R               |

The collaborative filtering cost function is given by

$$
J({\mathbf{x}^{(0)},\dots,\mathbf{x}^{(n_m-1)},\mathbf{w}^{(0)},b^{(0)},\dots,\mathbf{w}^{(n_u-1)},b^{(n_u-1)}}) = \left[\frac{1}{2}\sum_{(i,j):r(i,j)=1} (\mathbf{w}^{(j)} \cdot \mathbf{x}^{(i)} + b^{(j)} - y^{(i,j)})^2 \right] + \underbrace{\left[\frac{\lambda}{2} \sum^{n_u-1}_{j=0} \sum^{n-1}_{k=0} (\mathbf{w}^{(j)}_k)^2 + \frac{\lambda}{2} \sum^{n_m-1}_{i=0} \sum^{n-1}_{k=0} (\mathbf{x}^{(i)}_k)^2 \right]}_{\text{regularization}}
$$

The first summation in (1) is "for all $i$, $j$ where $r(i,j)$ equals $1$" and could be written:

$$
= \left[ \frac{1}{2}\sum_{j=0}^{n_u-1} \sum_{i=0}^{n_m-1}r(i,j)*(\mathbf{w}^{(j)} \cdot \mathbf{x}^{(i)} + b^{(j)} - y^{(i,j)})^2 \right] + \text{regularization}
$$

In [3]:
def cofi_cost_func(X: torch.Tensor, W: torch.Tensor, b: torch.Tensor,
                   Y: torch.Tensor, R: torch.Tensor, lambda_: torch.float) -> torch.float:
    z = torch.matmul(X, W.T)
    z = z.add(b)
    j = z.subtract(Y)
    j = j.pow(2)
    j = j.multiply(R)
    j = j.sum()
    j = j.multiply(0.5)

    reg_val = (lambda_ / 2) * (W.pow(2).sum() + X.pow(2).sum())

    return j + reg_val

## Data preparation

Load and convert the data into tensors

In [4]:
Y = load_np_arr(Y_FILE_NAME)
R = load_np_arr(R_FILE_NAME)

Y = torch.tensor(Y, device=dev)
R = torch.tensor(R, device=dev)

Data info

In [5]:
num_features = 100
num_movies = R.shape[0]
num_users = R.shape[1]

print('Num. features:', num_features)
print('Num. movies:  ', num_movies)
print('Num. users:   ', num_users)

Num. features: 100
Num. movies:   9724
Num. users:    610


Create and initialize parameters

In [6]:
X = nn.Parameter(
    torch.Tensor(num_movies, num_features).to(dev)
)
W = nn.Parameter(
    torch.Tensor(num_users, num_features).to(dev)
)
b = nn.Parameter(
    torch.Tensor(1, num_users).to(dev)
)


# Initialize parameters
nn.init.kaiming_uniform_(X, a=math.sqrt(5)).to(dev)
nn.init.kaiming_uniform_(W, a=math.sqrt(5)).to(dev)
fan_in, _ = nn.init._calculate_fan_in_and_fan_out(W)
bound = 1 / math.sqrt(fan_in)
nn.init.uniform_(b, -bound, bound).to(dev)

print(f'Initial loss: {cofi_cost_func(X, W, b, Y, R, lambda_=1)}')

Initial loss: 40620.94267767584


## Train

Create optimizer

In [7]:
optimizer = torch.optim.Adam(params=[X, W, b], lr=1e-4)

Training loop

In [8]:
epochs = 15001
lambda_ = 1

print('Epoch       Loss')

for epoch in range(1, epochs):
    cost_val = cofi_cost_func(X, W, b, Y, R, lambda_)

    optimizer.zero_grad()
    cost_val.backward()

    optimizer.step()

    if epoch % 100 == 0:
        print(f'{epoch:5}       {cost_val:.5f}')

Epoch       Loss
    1       40620.94268
  100       39073.83712
  200       37416.08541
  300       35435.24266
  400       33095.44997
  500       30518.86562
  600       27901.94262
  700       25397.83804
  800       23077.78437
  900       20962.85165
 1000       19055.26132
 1100       17350.08761
 1200       15837.86457
 1300       14505.39909
 1400       13336.90183
 1500       12315.35440
 1600       11423.73117
 1700       10645.86042
 1800       9966.91210
 1900       9373.60209
 2000       8854.21604
 2100       8398.53835
 2200       7997.73287
 2300       7644.19621
 2400       7331.41784
 2500       7053.83903
 2600       6806.72237
 2700       6586.03499
 2800       6388.34182
 2900       6210.71515
 3000       6050.65477
 3100       5906.02248
 3200       5774.98569
 3300       5655.96705
 3400       5547.60655
 3500       5448.72386
 3600       5358.29117
 3700       5275.41018
 3800       5199.29001
 3900       5129.23237
 4000       5064.61955
 4100       5004.90329

Save the parameters

In [9]:
print(save_tensor(W, W_FILE_NAME))
print(save_tensor(X, X_FILE_NAME))
print(save_tensor(b, B_FILE_NAME))