In [1]:
import torch
import torch.nn as nn

from tqdm.notebook import tqdm

from typing import List

## Gradient Descent

In [2]:
def fit(
    ratings: torch.FloatTensor,
    k: int,
    lr: float,
    iters: int,
    mu: float = None,
) -> List[torch.FloatTensor]:
    # Initialize params
    n_users, n_movies = ratings.shape
    expected = ratings.cuda()
    
    P = torch.rand(n_users, k, requires_grad=True, device="cuda")
    Q = torch.rand(n_movies, k, requires_grad=True, device="cuda")
    bu = torch.rand(n_users, 1, requires_grad=True, device="cuda")
    bi = torch.rand(n_movies, 1, requires_grad=True, device="cuda")
    
    if mu is None:
        mu = (expected.sum()/(expected!=0).sum())

    criterion = nn.MSELoss()
    
    # Fit
    ones_user = torch.ones(n_users, 1, requires_grad=False, device="cuda")
    ones_item = torch.ones(n_movies, 1, requires_grad=False, device="cuda")

    min_loss = torch.inf
    params = []
    for i in tqdm(range(iters)):
        Bu = torch.concat((bu, ones_user), dim=1)
        Bi = torch.concat((bi, ones_item), dim=1)

        pred = mu + Bu@(Bi.T) + P@(Q.T)

        loss = criterion(pred, expected)
        
        if min_loss > loss.detach():
            min_loss = float(loss.detach())
            params = [P.detach().cpu(), Q.detach().cpu(), bu.detach().cpu(), bi.detach().cpu()]
#             print(f"iter {i+1}: {min_loss}")

        loss.backward()

        with torch.no_grad():
            P -= lr*P.grad
            Q -= lr*Q.grad
            bu -= lr*bu.grad
            bi -= lr*bi.grad
            
    print(min_loss)
            
    return params

In [3]:
def reconstruct(
    P: torch.FloatTensor,
    Q: torch.FloatTensor,
    bu: torch.FloatTensor,
    bi: torch.FloatTensor,
    mu: float,
) -> torch.FloatTensor:
    P = P.cuda()
    Q = Q.cuda()
    bu = bu.cuda()
    bi = bi.cuda()
    
    Bu = torch.concat((bu, torch.ones(len(bu), 1, device="cuda")), dim=1)
    Bi = torch.concat((bi, torch.ones(len(bi), 1, device="cuda")), dim=1)
    
    mat = mu + Bu@Bi.T + P@Q.T
    
    return torch.clip(mat, 1, 5).cpu()

In [4]:
## Test
mat = torch.tensor([
    [5, 2, 4, 3, 2, 3],
    [4, 3, 5, 4, 3, 2],
    [1, 5, 3, 4, 4, 5],
    [1, 0, 2, 3, 4, 2],
], dtype=torch.float32)

out_P, out_Q, out_bu, out_bi = fit(ratings=mat, k=3, lr=0.08, iters=1000)

out_mat = reconstruct(out_P, out_Q, out_bu, out_bi, (mat.sum()/(mat!=0).sum()))

print(out_mat)
print()
print(mat)

  0%|          | 0/1000 [00:00<?, ?it/s]

0.8942086100578308
tensor([[4.8325, 3.5595, 4.4735, 3.3083, 3.4615, 2.3246],
        [4.7551, 3.3419, 4.9695, 3.0768, 1.9885, 1.6346],
        [2.0566, 3.6687, 2.6347, 3.6749, 5.0000, 5.0000],
        [1.0000, 1.0000, 1.0000, 1.7221, 4.5860, 1.6006]])

tensor([[5., 2., 4., 3., 2., 3.],
        [4., 3., 5., 4., 3., 2.],
        [1., 5., 3., 4., 4., 5.],
        [1., 0., 2., 3., 4., 2.]])


# Real Data

In [5]:
import numpy as np
from numpy.typing import NDArray
import pandas as pd
from numba import njit, prange

In [6]:
train_csv = pd.read_csv("../../data/train_data.csv")
test_csv = pd.read_csv("../../data/test_data.csv")

train_csv["user_id"] = train_csv["user_id"].apply(lambda x: x - 1)
train_csv["movie_id"] = train_csv["movie_id"].apply(lambda x: x - 1)

test_csv["user_id"] = test_csv["user_id"].apply(lambda x: x - 1)
test_csv["movie_id"] = test_csv["movie_id"].apply(lambda x: x - 1)

# Split into train and validation
train_data = train_csv.drop(["timestamp"], axis=1).sample(frac=0.8)
validation_data = train_csv.drop(train_data.index).drop(["timestamp"], axis=1)

assert train_data.shape[0] + validation_data.shape[0] == train_csv.shape[0]## Split into train and validation

In [7]:
@njit
def build_matrix_jit(data: NDArray[np.uint8], n_users: int, n_items: int):
    """
    data: Array of size (n_entries, 3) with columns representing:
        col 0: user_id
        col 1: item_id
         col 2: rating
    """
    ratings_mat = np.zeros((n_users, n_items), dtype=np.float32)

    for row in data:
        u, i, r = row
        ratings_mat[u, i] = r
    
    return ratings_mat

users = 3974
movies = 3564

ratings_mat = build_matrix_jit(train_data.values, users, movies)## Compute predictions
ratings_mat = ratings_mat

In [8]:
@njit(nogil=True, parallel=True)
def predict_batch(X: NDArray[int], reconstructed_matrix: NDArray[np.float64]):
    """
    X: Array of size (n_entries, 3) with columns representing:
        col 0: user_id
        col 1: item_id
    """
    predictions = np.zeros(len(X), dtype=np.float64)
    
    for i in prange(len(X)):
        user_id, movie_id = X[i]
        predictions[i] = reconstructed_matrix[user_id-1, movie_id-1]
        
    return np.clip(predictions, 1, 5)

In [9]:
@njit(parallel=True)
def calc_error(preds, expected):
    num = 0
    for i in prange(len(preds)):
        num += (preds[i] - expected[i])**2

    rmse = np.sqrt(num/len(preds))
    return rmse

# Fit

In [10]:
mu_train = (ratings_mat.sum()/(ratings_mat!=0).sum())

data_arr = validation_data.drop(["rating"], axis=1).values
expected_arr = validation_data["rating"].values

In [11]:
means_users = ratings_mat.sum(axis=1)/((ratings_mat!=0).sum(axis=1) + 1e-15)
means_items = ratings_mat.sum(axis=0)/((ratings_mat!=0).sum(axis=0) + 1e-15)

# Fill zeros with the mean of means of users and items
for u in tqdm(range(ratings_mat.shape[0])):
    for i in range(ratings_mat.shape[1]):
        if ratings_mat[u, i] == 0:
            ratings_mat[u, i] = (means_users[u] + means_items[i])/2
            
print(ratings_mat)

  0%|          | 0/3974 [00:00<?, ?it/s]

[[3.9266164 3.4706297 3.385348  ... 3.7571428 4.107143  3.7967033]
 [3.8194737 3.3634868 3.2782052 ... 3.65      4.        3.6895604]
 [3.9444737 3.4884868 3.4032052 ... 3.775     4.125     3.8145604]
 ...
 [4.1294737 3.673487  3.588205  ... 3.96      4.31      3.9995604]
 [3.9980452 3.5420582 3.4567766 ... 3.8285713 4.178571  3.8681319]
 [3.9583626 3.5023758 3.417094  ... 3.788889  4.138889  3.8284492]]


In [12]:
ratings_mat_torch = torch.from_numpy(ratings_mat.astype(np.float32))

In [13]:
min_err = np.inf
best_k = 2
for k in tqdm(range(2, 50)):
    fitted_params = fit(ratings=ratings_mat_torch, k=k, lr=0.5, iters=1000, mu=mu_train)
    recontructed_mat = reconstruct(*fitted_params, mu_train).numpy()
    
    predicted = predict_batch(data_arr, recontructed_mat)
    err = calc_error(predicted, expected_arr)
    
    if min_err > err:
        min_err = err
        best_k = k

  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

0.05239686369895935


  0%|          | 0/1000 [00:00<?, ?it/s]

0.04032544791698456


  0%|          | 0/1000 [00:00<?, ?it/s]

0.038827743381261826


  0%|          | 0/1000 [00:00<?, ?it/s]

0.06139560416340828


  0%|          | 0/1000 [00:00<?, ?it/s]

0.075432188808918


  0%|          | 0/1000 [00:00<?, ?it/s]

0.09848018735647202


  0%|          | 0/1000 [00:00<?, ?it/s]

0.1616826355457306


  0%|          | 0/1000 [00:00<?, ?it/s]

0.12556520104408264


  0%|          | 0/1000 [00:00<?, ?it/s]

0.24873341619968414


  0%|          | 0/1000 [00:00<?, ?it/s]

0.24543356895446777


  0%|          | 0/1000 [00:00<?, ?it/s]

0.1391724944114685


  0%|          | 0/1000 [00:00<?, ?it/s]

0.3285300135612488


  0%|          | 0/1000 [00:00<?, ?it/s]

0.2724137008190155


  0%|          | 0/1000 [00:00<?, ?it/s]

0.34786683320999146


  0%|          | 0/1000 [00:00<?, ?it/s]

0.33258315920829773


  0%|          | 0/1000 [00:00<?, ?it/s]

0.29863977432250977


  0%|          | 0/1000 [00:00<?, ?it/s]

0.2148847132921219


  0%|          | 0/1000 [00:00<?, ?it/s]

0.31754976511001587


  0%|          | 0/1000 [00:00<?, ?it/s]

0.12747211754322052


  0%|          | 0/1000 [00:00<?, ?it/s]

0.38699039816856384


  0%|          | 0/1000 [00:00<?, ?it/s]

0.3905424475669861


  0%|          | 0/1000 [00:00<?, ?it/s]

0.3928970396518707


  0%|          | 0/1000 [00:00<?, ?it/s]

0.264500230550766


  0%|          | 0/1000 [00:00<?, ?it/s]

0.3699822425842285


  0%|          | 0/1000 [00:00<?, ?it/s]

0.41749152541160583


  0%|          | 0/1000 [00:00<?, ?it/s]

0.40431109070777893


  0%|          | 0/1000 [00:00<?, ?it/s]

0.4276365339756012


  0%|          | 0/1000 [00:00<?, ?it/s]

0.42686715722084045


  0%|          | 0/1000 [00:00<?, ?it/s]

0.4489938020706177


  0%|          | 0/1000 [00:00<?, ?it/s]

0.29894623160362244


  0%|          | 0/1000 [00:00<?, ?it/s]

0.13120953738689423


  0%|          | 0/1000 [00:00<?, ?it/s]

0.27384355664253235


  0%|          | 0/1000 [00:00<?, ?it/s]

0.4784429669380188


  0%|          | 0/1000 [00:00<?, ?it/s]

0.4948994517326355


  0%|          | 0/1000 [00:00<?, ?it/s]

0.3199393153190613


  0%|          | 0/1000 [00:00<?, ?it/s]

0.5034310817718506


  0%|          | 0/1000 [00:00<?, ?it/s]

0.18108943104743958


  0%|          | 0/1000 [00:00<?, ?it/s]

0.33161240816116333


  0%|          | 0/1000 [00:00<?, ?it/s]

0.2194565236568451


  0%|          | 0/1000 [00:00<?, ?it/s]

0.07791364192962646


  0%|          | 0/1000 [00:00<?, ?it/s]

0.5158179998397827


  0%|          | 0/1000 [00:00<?, ?it/s]

0.5408105850219727


  0%|          | 0/1000 [00:00<?, ?it/s]

0.4336852431297302


  0%|          | 0/1000 [00:00<?, ?it/s]

0.3836505115032196


  0%|          | 0/1000 [00:00<?, ?it/s]

0.615239679813385


  0%|          | 0/1000 [00:00<?, ?it/s]

0.47563666105270386


  0%|          | 0/1000 [00:00<?, ?it/s]

0.5755442976951599


  0%|          | 0/1000 [00:00<?, ?it/s]

0.26993611454963684


In [14]:
print(best_k, min_err)

40 1.1275291768195534


## Predict

In [16]:
total_ratings = build_matrix_jit(train_csv.drop(["timestamp"], axis=1).values, users, movies)
mu_total = (total_ratings.sum()/(total_ratings!=0).sum())

In [17]:
means_users_total = total_ratings.sum(axis=1)/((total_ratings!=0).sum(axis=1) + 1e-15)
means_items_total = total_ratings.sum(axis=0)/((total_ratings!=0).sum(axis=0) + 1e-15)

# Fill zeros with the mean of means of users and items
for u in tqdm(range(total_ratings.shape[0])):
    for i in range(total_ratings.shape[1]):
        if total_ratings[u, i] == 0:
            total_ratings[u, i] = (means_users_total[u] + means_items_total[i])/2
            
print(total_ratings)

  0%|          | 0/3974 [00:00<?, ?it/s]

[[3.9558017 3.4920425 3.4046154 ... 3.7846153 4.1346154 3.8170478]
 [3.7854722 3.3217127 3.2342858 ... 3.6142857 3.9642856 3.6467183]
 [3.9802773 3.5165179 3.429091  ... 3.8090909 4.159091  3.8415234]
 ...
 [4.1278872 3.664128  3.576701  ... 3.956701  4.306701  3.9891334]
 [4.0711865 3.6074271 3.52      ... 3.9       4.25      3.9324324]
 [3.9997578 3.5359986 3.4485714 ... 3.8285713 4.178571  3.8610039]]


In [18]:
total_ratings_torch = torch.from_numpy(total_ratings.astype(np.float32))

fitted_params = fit(ratings=ratings_mat_torch, k=best_k, lr=0.5, iters=10000, mu=mu_total)
recontructed_mat = reconstruct(*fitted_params, mu_total).numpy()

  0%|          | 0/10000 [00:00<?, ?it/s]

0.1604769080877304


In [19]:
test_predictions = predict_batch(
    test_csv.drop(["id", "timestamp"], axis=1).values,
    recontructed_mat,
)

In [20]:
out_df = pd.DataFrame.from_dict(
    {
        "id": list(test_csv["id"]),
        "rating": test_predictions
    }
)

out_df.head()

Unnamed: 0,id,rating
0,0,3.578981
1,1,3.840212
2,2,4.010411
3,3,3.869724
4,4,3.711957


In [21]:
out_df.to_csv("out_svd_gd_3.csv", index=False)