# PyTorch Model

In [1]:
import torch
import torch.nn as nn
import numpy as np
import math

from settings import *
from utils import load_np_arr, save_tensor

In [2]:
dev = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)

print(f"Using {dev} device")

Using cuda device


## Data preparation

Load and convert the data into tensors

In [3]:
Y = load_np_arr(Y_FILE_NAME)
R = load_np_arr(R_FILE_NAME)
mu = load_np_arr(MU_FILE_NAME)

Data info

In [4]:
num_features = 100
num_movies = R.shape[0]
num_users = R.shape[1]

print('Num. features:', num_features)
print('Num. movies:  ', num_movies)
print('Num. users:   ', num_users)

Num. features: 100
Num. movies:   9724
Num. users:    610


Manually rate some movies by their id

In [5]:
my_ratings = np.zeros(num_movies)

my_ratings[314] = 5 # Forrest Gump
my_ratings[461] = 5 # Schindler's List

my_ratings[904] = 5 # 12 Angry Men
my_ratings[7315] = 5 # 12 Angry Men too! we rate them both just in case

my_ratings[3635] = 5 # A Beautiful Mind (2001)
my_ratings[8240] = 5 # Rush (2013)
my_ratings[8448] = 5 # Whiplash (2014)
my_ratings[9363] = 4 # Your Name. (2016)

my_ratings[7675] = 0.5 # The Avengers (2012)
my_ratings[8668] = 0.5 # Avengers: Age of Ultron
my_ratings[8675] = 0.5 # Avengers: Infinity War - Part I
my_ratings[1985] = 0.5 # Superman
my_ratings[1986] = 0.5 # Superman II
my_ratings[1987] = 0.5 # Superman III
my_ratings[1988] = 0.5 # Superman IV: The Quest for Peace

Append `my_ratings` to the `R` amd `Y`

In [6]:
# Push the normalized my_ratings to Y
Y = np.c_[my_ratings - mu.squeeze(), Y]

# Push the binary my_ratings to R
R = np.c_[(my_ratings != 0).astype(int), R]

Increment `num_users` after appending new user

In [7]:
num_users += 1

## Collaborative filtering cost function

|General <br />  Notation  | Description                                                                | Python (if any) |
|:-------------------------|:---------------------------------------------------------------------------|-----------------|
| $r(i,j)$                 | scalar; = 1  if user j rated movie i  = 0  otherwise                       |                 |
| $y(i,j)$                 | scalar; = rating given by user j on movie  i    (if r(i,j) = 1 is defined) |                 |
| $\mathbf{w}^{(j)}$       | vector; parameters for user j                                              |                 |
| $b^{(j)}$                | scalar; parameter for user j                                               |                 |
| $\mathbf{x}^{(i)}$       | vector; feature ratings for movie i                                        |                 |     
| $n_m$                    | number of movies                                                           | num_movies      |
| $n_u$                    | number of users                                                            | num_users       |
| $n$                      | number of features                                                         | num_features    |
| $\mathbf{X}$             | matrix of vectors $\mathbf{x}^{(i)}$                                       | X               |
| $\mathbf{W}$             | matrix of vectors $\mathbf{w}^{(j)}$                                       | W               |
| $\mathbf{b}$             | vector of bias parameters $b^{(j)}$                                        | b               |
| $\mathbf{R}$             | matrix of elements $r(i,j)$                                                | R               |

The collaborative filtering cost function is given by

$$
J({\mathbf{x}^{(0)},\dots,\mathbf{x}^{(n_m-1)},\mathbf{w}^{(0)},b^{(0)},\dots,\mathbf{w}^{(n_u-1)},b^{(n_u-1)}}) = \left[\frac{1}{2}\sum_{(i,j):r(i,j)=1} (\mathbf{w}^{(j)} \cdot \mathbf{x}^{(i)} + b^{(j)} - y^{(i,j)})^2 \right] + \underbrace{\left[\frac{\lambda}{2} \sum^{n_u-1}_{j=0} \sum^{n-1}_{k=0} (\mathbf{w}^{(j)}_k)^2 + \frac{\lambda}{2} \sum^{n_m-1}_{i=0} \sum^{n-1}_{k=0} (\mathbf{x}^{(i)}_k)^2 \right]}_{\text{regularization}}
$$

The first summation in (1) is "for all $i$, $j$ where $r(i,j)$ equals $1$" and could be written:

$$
= \left[ \frac{1}{2}\sum_{j=0}^{n_u-1} \sum_{i=0}^{n_m-1}r(i,j)*(\mathbf{w}^{(j)} \cdot \mathbf{x}^{(i)} + b^{(j)} - y^{(i,j)})^2 \right] + \text{regularization}
$$

In [8]:
def cofi_cost_func(X: torch.Tensor, W: torch.Tensor, b: torch.Tensor,
                   Y: torch.Tensor, R: torch.Tensor, lambda_: torch.float) -> torch.float:

    j = torch.matmul(X, W.T).add(b).sub(Y).mul(R).pow(2).sum().mul(0.5)
    reg_val = (lambda_ / 2) * (X.pow(2).sum() + W.pow(2).sum())
    return j + reg_val

## Model

Convert `R` and `Y` to tensors

In [9]:
Y = torch.tensor(Y, device=dev)
R = torch.tensor(R, device=dev)

Create and initialize parameters

In [10]:
X = nn.Parameter(
    torch.Tensor(num_movies, num_features).to(dev)
)
W = nn.Parameter(
    torch.Tensor(num_users, num_features).to(dev)
)
b = nn.Parameter(
    torch.Tensor(1, num_users).to(dev)
)


# Initialize parameters
nn.init.kaiming_uniform_(X, a=10).to(dev)
nn.init.kaiming_uniform_(W, a=10).to(dev)
fan_in, _ = nn.init._calculate_fan_in_and_fan_out(W)
bound = 1 / math.sqrt(fan_in)
nn.init.uniform_(b, -bound, bound).to(dev)

print(f'Initial loss: {cofi_cost_func(X, W, b, Y, R, lambda_=1)}')

Initial loss: 38968.55409818531


Create optimizer

In [11]:
optimizer = torch.optim.Adam(params=[X, W, b], lr=1e-4)

Training loop

In [12]:
%%time

epochs = 10001
lambda_ = 1

print('Epoch    Loss')

for epoch in range(1, epochs):
    cost_val = cofi_cost_func(X, W, b, Y, R, lambda_)

    optimizer.zero_grad()
    cost_val.backward()

    optimizer.step()

    if epoch % 1000 == 0:
        print(f'{epoch:5}    {cost_val:.5f}')

Epoch    Loss
 1000    19545.25860
 2000    9534.07339
 3000    6375.19280
 4000    5215.38281
 5000    4694.91474
 6000    4415.83765
 7000    4248.21838
 8000    4141.17938
 9000    4069.84808
10000    4020.70026
CPU times: user 2min 11s, sys: 1min 17s, total: 3min 29s
Wall time: 3min 29s


Save the parameters

In [13]:
print(save_tensor(W, W_FILE_NAME))
print(save_tensor(X, X_FILE_NAME))
print(save_tensor(b, B_FILE_NAME))

Saved tensor in ./cache/W.pt
Saved tensor in ./cache/X.pt
Saved tensor in ./cache/b.pt


## Inference

Convert parameters to numpy array

In [14]:
X = X.cpu().detach().numpy()
W = W.cpu().detach().numpy()
b = b.cpu().detach().numpy()

Load movies.csv to extract the movie titles for inference

In [15]:
import pandas as pd
movies_df = pd.read_csv(MOVIES_CSV_FILE, header=0,  delimiter=',', quotechar='"')

Perform inference by calculating the predicted ratings

In [16]:
pred = np.matmul(X, W.T) + b + mu

Sort the ratings

In [17]:
# Extract the predicted ratings for my_ratings
sorted_pred = pred[:, 0]

# Sort the predicted ratings in ascending order
sorted_pred = sorted_pred.argsort()

# Create a reversed order view of sorted_pred to get the top-rated movies
top_movies = sorted_pred[::-1]

In [18]:
num_suggestions = 15

i = 0
while num_suggestions > 0 and i < len(top_movies):
    cur_id = top_movies[i]

    if my_ratings[cur_id] == 0:  # This movie has not been rated yet
        movie = movies_df.loc[movies_df['movieId'] == cur_id, 'title']
        num_suggestions -= 1

        print(movie.values[0])

    i += 1

Shawshank Redemption, The (1994)
Godfather, The (1972)
Casablanca (1942)
Saving Private Ryan (1998)
Braveheart (1995)
Three Billboards Outside Ebbing, Missouri (2017)
Dr. Goldfoot and the Bikini Machine (1965)
61* (2001)
Thin Line Between Love and Hate, A (1996)
When Worlds Collide (1951)
Bossa Nova (2000)
Dragons: Gift of the Night Fury (2011)
English Vinglish (2012)
Life Is Beautiful (La Vita è bella) (1997)
Kung Fu Panda: Secrets of the Masters (2011)
