In [1]:
import torch
import torch.nn as nn

from tqdm.notebook import tqdm

from typing import List

import numpy as np

## Gradient Descent

In [2]:
def reconstruct(
    P: torch.FloatTensor,
    Q: torch.FloatTensor,
    bu: torch.FloatTensor,
    bi: torch.FloatTensor,
    mu: float,
) -> torch.FloatTensor:
    P = P.cuda()
    Q = Q.cuda()
    bu = bu.cuda()
    bi = bi.cuda()
    
    Bu = torch.concat((bu, torch.ones(len(bu), 1, device="cuda")), dim=1)
    Bi = torch.concat((bi, torch.ones(len(bi), 1, device="cuda")), dim=1)
    
    mat = mu + Bu@Bi.T + P@Q.T
    
    return torch.clip(mat, 1, 5).cpu()

In [23]:
def fit(
    uir_mat: torch.IntTensor, # User Item rating mat
    k: int,
    lr: float,
    λ: float,
    iters: int,
    n_users: int,
    n_movies: int,
    mu: float = None,
    uir_val: torch.IntTensor = None
) -> List[torch.FloatTensor]:
    train_losses = np.zeros(iters)
    val_losses = np.zeros(iters)
    
    # Initialize params
    uir_mat = uir_mat.cuda()
    expected = uir_mat[:, 2].float()
    n_interactions = expected.shape[0]
    
    if uir_val is not None:
        uir_val = uir_val.cuda()
        expected_val = uir_val[:, 2].float()
        n_interactions_val = expected_val.shape[0]
    
    P = torch.rand(n_users, k, requires_grad=True, device="cuda")
    Q = torch.rand(n_movies, k, requires_grad=True, device="cuda")
    bu = torch.rand(n_users, 1, requires_grad=True, device="cuda")
    bi = torch.rand(n_movies, 1, requires_grad=True, device="cuda")
    
    if mu is None:
        mu = (expected.sum()/(expected!=0).sum())

#     criterion = nn.MSELoss()
    
    # Fit
    ones_user = torch.ones(n_users, 1, requires_grad=False, device="cuda")
    ones_item = torch.ones(n_movies, 1, requires_grad=False, device="cuda")

    min_loss = torch.inf
    params = []
    
    val_loss = torch.inf
    for i in tqdm(range(iters)):
        Bu = torch.concat((bu, ones_user), dim=1)
        Bi = torch.concat((bi, ones_item), dim=1)

        pred_mat = mu + Bu@(Bi.T) + P@(Q.T)

        # Calculate gradient only respect to know ratings
        pred = pred_mat[uir_mat[:, 0], uir_mat[:, 1]]

#         loss = criterion(pred, expected)
        # Regularized rmse
#         loss = 1/(2*n_interactions) * torch.sum((pred - expected)**2)
        loss = 1/(2*n_interactions) * torch.sum((pred - expected)**2) + λ*(torch.sum(P**2) + torch.sum(Q**2))
        train_losses[i] = float(loss.detach())
        
        if min_loss > loss.detach():
            min_loss = float(loss.detach())
            params = [P.detach().cpu(), Q.detach().cpu(), bu.detach().cpu(), bi.detach().cpu()]
#             print(f"iter {i+1}: {min_loss}")

        loss.backward()

        with torch.no_grad():
            # Validation error
            if uir_val is not None:
                pred_val = pred_mat[uir_val[:, 0], uir_val[:, 1]]
                const = 1/(2*n_interactions_val)
                val_loss = const * torch.sum((pred_val - expected_val)**2) + λ/2*(torch.sum(P**2) + torch.sum(Q**2))
                val_losses[i] = float(val_loss)
            
            P -= lr*P.grad
            Q -= lr*Q.grad
            bu -= lr*bu.grad
            bi -= lr*bi.grad
            
        P.grad.zero_()
        Q.grad.zero_()
        bu.grad.zero_()
        bi.grad.zero_()
            
    print("train:", min_loss)
    print("val:", float(val_loss))
    return params, train_losses, val_losses

In [24]:
## Test
mat = torch.tensor([
    [5, 2, 4, 3, 2, 3],
    [4, 3, 5, 4, 3, 2],
    [1, 5, 3, 4, 4, 5],
    [1, 0, 2, 3, 4, 2],
], dtype=torch.float32)
uir_test_mat = torch.zeros(mat.shape[0]*mat.shape[1] -1, 3, dtype=torch.long)
k = 0
for i in range(mat.shape[0]):
    for j in range(mat.shape[1]):
        if mat[i, j] != 0:
            uir_test_mat[k] = torch.tensor([i, j, mat[i, j]])
            k += 1

(out_P, out_Q, out_bu, out_bi), _, _ = fit(
    uir_mat=uir_test_mat, 
    k=3, 
    lr=0.08, 
    λ=0.01, 
    iters=70, 
    n_users=4, 
    n_movies=6
)

  0%|          | 0/70 [00:00<?, ?it/s]

train: 0.7445540428161621
val: inf


In [5]:
out_mat = reconstruct(out_P, out_Q, out_bu, out_bi, (mat.sum()/(mat!=0).sum()))

print(out_mat)
print()
print(mat)

tensor([[4.1269, 4.0524, 4.0122, 4.1652, 3.8172, 4.0693],
        [3.9715, 4.1880, 4.2214, 4.1969, 3.8973, 4.1179],
        [3.9735, 4.2799, 4.1403, 4.0378, 4.4103, 4.1109],
        [3.2400, 4.0822, 3.3688, 3.4950, 3.6672, 3.4347]])

tensor([[5., 2., 4., 3., 2., 3.],
        [4., 3., 5., 4., 3., 2.],
        [1., 5., 3., 4., 4., 5.],
        [1., 0., 2., 3., 4., 2.]])


# Real Data

In [6]:
import numpy as np
from numpy.typing import NDArray
import pandas as pd
from numba import njit, prange

In [7]:
users = 3974
movies = 3564

train_csv = pd.read_csv("../../data/train_data.csv")
test_csv = pd.read_csv("../../data/test_data.csv")

train_csv["user_id"] = train_csv["user_id"].apply(lambda x: x - 1)
train_csv["movie_id"] = train_csv["movie_id"].apply(lambda x: x - 1)

test_csv["user_id"] = test_csv["user_id"].apply(lambda x: x - 1)
test_csv["movie_id"] = test_csv["movie_id"].apply(lambda x: x - 1)

# Split into train and validation
train_data = train_csv.drop(["timestamp"], axis=1).sample(frac=0.8)
validation_data = train_csv.drop(train_data.index).drop(["timestamp"], axis=1)

assert train_data.shape[0] + validation_data.shape[0] == train_csv.shape[0]## Split into train and validation

# Fit

In [8]:
import matplotlib.pyplot as plt

In [9]:
uir_train = train_data.values
mu_train = np.mean(uir_train[:, 2])
uir_train_tensor = torch.from_numpy(uir_train)

uir_val = validation_data.values
n_val = uir_val.shape[0]
uir_val_tensor = torch.from_numpy(uir_val)

In [25]:
min_err = np.inf
best_k = 2
# for k in tqdm(range(2, 4)):
for k in tqdm(range(98, 101)):
    fitted_params, tr_loss, v_loss = fit(
        uir_mat=uir_train_tensor,
        k=50,
        lr=0.8, 
        λ=0.01, 
        iters=1000, 
        n_users=users, 
        n_movies=movies, 
        mu=mu_train,
        uir_val=uir_val_tensor,
    )
    recontructed_mat = reconstruct(*fitted_params, mu_train).numpy()
    
    predicted = recontructed_mat[uir_val[:, 0], uir_val[:, 1]]
    
#     predicted = predicted.round()
    err = 1/(2*n_val) * np.sum((predicted - uir_val[:, 2])**2)
    
#     plt.plot(tr_loss[-300:])
#     plt.plot(v_loss[-300:])
#     plt.show()
    
    if min_err > err:
        min_err = err
        best_k = k

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

train: 1.1619596481323242
val: 1.1594502925872803


  0%|          | 0/1000 [00:00<?, ?it/s]

train: 1.1649868488311768
val: 1.162549614906311


  0%|          | 0/1000 [00:00<?, ?it/s]

train: 1.1605204343795776
val: 1.1594702005386353


In [20]:
predicted[:50]

array([4.573386 , 4.28248  , 4.6106195, 4.8903956, 4.808412 , 4.968319 ,
       5.       , 4.587598 , 4.934968 , 4.6399937, 4.7482495, 4.7073774,
       4.7198052, 4.7356224, 4.755826 , 4.5733213, 4.613162 , 4.493431 ,
       4.4028664, 4.5966983, 4.60465  , 4.5945363, 4.4754615, 4.7571745,
       4.8311214, 4.567875 , 4.706207 , 4.861495 , 4.760673 , 4.8656297,
       4.8634467, 4.7906184, 4.840976 , 4.6481967, 4.698329 , 4.82633  ,
       4.900764 , 5.       , 4.8186274, 4.8785615, 4.685965 , 4.668737 ,
       5.       , 4.8909073, 4.541462 , 4.3416266, 4.703979 , 4.8090878,
       5.       , 4.7518473], dtype=float32)

In [None]:
print(best_k, min_err)

## Predict

In [None]:
uir_full = train_csv.drop(["timestamp"], axis=1).values
uir_full = torch.from_numpy(uir_full)
mu_full = torch.mean(uir_full[:, 2].float())

fitted_params = fit(
    uir_mat=uir_full, k=best_k, lr=0.8, λ=0.01, iters=10000, n_users=users, n_movies=movies, mu=mu_full)
recontructed_mat = reconstruct(*fitted_params, mu_full).numpy()

In [None]:
# test_predictions = predict_batch(
#     test_csv.drop(["id", "timestamp"], axis=1).values,
#     recontructed_mat,
# )
uir_test = test_csv.drop(["id", "timestamp"], axis=1).values
test_predictions = recontructed_mat[uir_test[:, 0], uir_test[:, 1]]
print(test_predictions[:10])

In [None]:
out_df = pd.DataFrame.from_dict(
    {
        "id": list(test_csv["id"]),
        "rating": test_predictions
    }
)

out_df.head()

In [None]:
out_df.to_csv("out_funk_svd_1.csv", index=False)