# PyTorch Model

In [5]:
import torch
import torch.nn as nn
import numpy as np
import math

from settings import *
from utils import load_np_arr

In [76]:
dev = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)

print(f"Using {dev} device")

Using cuda device


## Collaborative filtering model

|General <br />  Notation  | Description                                                                | Python (if any) |
|:-------------------------|:---------------------------------------------------------------------------|-----------------|
| $r(i,j)$                 | scalar; = 1  if user j rated movie i  = 0  otherwise                       |                 |
| $y(i,j)$                 | scalar; = rating given by user j on movie  i    (if r(i,j) = 1 is defined) |                 |
| $\mathbf{w}^{(j)}$       | vector; parameters for user j                                              |                 |
| $b^{(j)}$                | scalar; parameter for user j                                               |                 |
| $\mathbf{x}^{(i)}$       | vector; feature ratings for movie i                                        |                 |     
| $n_m$                    | number of movies                                                           | num_movies      |
| $n_u$                    | number of users                                                            | num_users       |
| $n$                      | number of features                                                         | num_features    |
| $\mathbf{X}$             | matrix of vectors $\mathbf{x}^{(i)}$                                       | X               |
| $\mathbf{W}$             | matrix of vectors $\mathbf{w}^{(j)}$                                       | W               |
| $\mathbf{b}$             | vector of bias parameters $b^{(j)}$                                        | b               |
| $\mathbf{R}$             | matrix of elements $r(i,j)$                                                | R               |

The collaborative filtering cost function is given by
$$J({\mathbf{x}^{(0)},...,\mathbf{x}^{(n_m-1)},\mathbf{w}^{(0)},b^{(0)},...,\mathbf{w}^{(n_u-1)},b^{(n_u-1)}})= \left[ \frac{1}{2}\sum_{(i,j):r(i,j)=1}(\mathbf{w}^{(j)} \cdot \mathbf{x}^{(i)} + b^{(j)} - y^{(i,j)})^2 \right]
+ \underbrace{\left[
\frac{\lambda}{2}
\sum_{j=0}^{n_u-1}\sum_{k=0}^{n-1}(\mathbf{w}^{(j)}_k)^2
+ \frac{\lambda}{2}\sum_{i=0}^{n_m-1}\sum_{k=0}^{n-1}(\mathbf{x}_k^{(i)})^2
\right]}_{regularization}
\tag{1}$$
The first summation in (1) is "for all $i$, $j$ where $r(i,j)$ equals $1$" and could be written:

$$
= \left[ \frac{1}{2}\sum_{j=0}^{n_u-1} \sum_{i=0}^{n_m-1}r(i,j)*(\mathbf{w}^{(j)} \cdot \mathbf{x}^{(i)} + b^{(j)} - y^{(i,j)})^2 \right]
+\text{regularization}
$$

In [103]:
class CoFil(nn.Module):
    def __init__(self, num_movies: int, num_users: int, num_features: int) -> None:
        super().__init__()

        # Add parameters
        self.X = nn.Parameter(
            torch.Tensor(num_movies, num_features).to(dev),
        )
        self.W = nn.Parameter(
            torch.Tensor(num_users, num_features).to(dev),
        )
        self.b = nn.Parameter(
            torch.Tensor(num_movies, 1).to(dev),
        )

        # Initialize parameters
        nn.init.kaiming_uniform_(self.X, a=math.sqrt(5)).to(dev)
        nn.init.kaiming_uniform_(self.W, a=math.sqrt(5)).to(dev)
        fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.W)
        bound = 1 / math.sqrt(fan_in)
        nn.init.uniform_(self.b, -bound, bound).to(dev)

    def forward(self, R: torch.Tensor):
    # def forward(self, R: torch.Tensor, Y: torch.Tensor, lambda_: float):
        z = torch.matmul(self.X, self.W.T)
        z = z.add(self.b)
        return z.multiply(R)
        # j = torch.matmul(self.X, self.W.T)
        # j = j.add(self.b)
        # j = j.subtract(Y)
        # j = j.pow(2)
        # j = j.multiply(R)
        # j = j.multiply(0.5)
        # return j.sum((self.W.pow(2).sum() + self.X.pow(2).sum()).multiply(lambda_ / 2))


In [86]:
def CoFilLoss(y_pred: torch.Tensor,
              state_dict=dict[str, torch.Tensor],
              lambda_: float = 1):
    j = y_pred.subtract(y_pred)
    j = j.pow(2)
    j = j.multiply(0.5)
    j = j.sum()

    W = state_dict['W']
    X = state_dict['X']
    return j + (W.pow(2).sum() + X.pow(2).sum()).multiply(lambda_ / 2)


## Data preparation

Load and convert the data into tensors

In [79]:
Y = load_np_arr(Y_FILE_NAME)
R = load_np_arr(R_FILE_NAME)

Y = torch.tensor(Y, device=dev)
R = torch.tensor(R, device=dev)

Loaded data info

In [80]:
print('Num. features:', 10, '   i.e. num. of ratings')
print('Num. movies:  ', R.shape[0])
print('Num. users:   ', R.shape[1])

Num. features: 10    i.e. num. of ratings
Num. movies:   9724
Num. users:    610


Split data into train and sets

In [81]:
from sklearn.model_selection import train_test_split

R_train, R_test, Y_train, Y_test = train_test_split(R, Y, test_size=0.2, random_state=42)

print('R & Y shape:             ', R.shape)
print('R_train & Y_train shape: ', R_train.shape)
print('R_test & Y_test shape:   ', R_test.shape)

R & Y shape:              torch.Size([9724, 610])
R_train & Y_train shape:  torch.Size([7779, 610])
R_test & Y_test shape:    torch.Size([1945, 610])


## Train

In [104]:
model = CoFil(*R_train.shape, num_features=10).to(dev)
optimizer = torch.optim.Adam(params=model.parameters(), lr=5e-4)

In [109]:
epochs = 10

for epoch in range(epochs):
    model.train()

    y_pred = model(R_train)

    loss = CoFilLoss(y_pred, model.state_dict())

    optimizer.zero_grad()

    # loss.backward()

    optimizer.step()


    print(loss)

tensor(1398.1997, device='cuda:0', grad_fn=<AddBackward0>)
tensor(1398.1997, device='cuda:0', grad_fn=<AddBackward0>)
tensor(1398.1997, device='cuda:0', grad_fn=<AddBackward0>)
tensor(1398.1997, device='cuda:0', grad_fn=<AddBackward0>)
tensor(1398.1997, device='cuda:0', grad_fn=<AddBackward0>)
tensor(1398.1997, device='cuda:0', grad_fn=<AddBackward0>)
tensor(1398.1997, device='cuda:0', grad_fn=<AddBackward0>)
tensor(1398.1997, device='cuda:0', grad_fn=<AddBackward0>)
tensor(1398.1997, device='cuda:0', grad_fn=<AddBackward0>)
tensor(1398.1997, device='cuda:0', grad_fn=<AddBackward0>)
