In [1]:
import torch
import torch.nn as nn

from tqdm.notebook import tqdm

from typing import List

## Gradient Descent

In [2]:
def reconstruct(
    P: torch.FloatTensor,
    Q: torch.FloatTensor,
    bu: torch.FloatTensor,
    bi: torch.FloatTensor,
    mu: float,
) -> torch.FloatTensor:
    P = P.cuda()
    Q = Q.cuda()
    bu = bu.cuda()
    bi = bi.cuda()
    
    Bu = torch.concat((bu, torch.ones(len(bu), 1, device="cuda")), dim=1)
    Bi = torch.concat((bi, torch.ones(len(bi), 1, device="cuda")), dim=1)
    
    mat = mu + Bu@Bi.T + P@Q.T
    
    return torch.clip(mat, 1, 5).cpu()

In [3]:
def fit(
    uir_mat: torch.IntTensor, # User Item rating mat
    k: int,
    lr: float,
    λ: float,
    iters: int,
    n_users: int,
    n_movies: int,
    mu: float = None,
) -> List[torch.FloatTensor]:
    # Initialize params
    uir_mat = uir_mat.cuda()
    expected = uir_mat[:, 2].float()
    n_interactions = expected.shape[0]
    
    P = torch.randn(n_users, k, requires_grad=True, device="cuda")
    Q = torch.randn(n_movies, k, requires_grad=True, device="cuda")
    bu = torch.zeros(n_users, 1, requires_grad=True, device="cuda")
    bi = torch.zeros(n_movies, 1, requires_grad=True, device="cuda")
    
    if mu is None:
        mu = (expected.sum()/(expected!=0).sum())

#     criterion = nn.MSELoss()
    
    # Fit
    ones_user = torch.ones(n_users, 1, requires_grad=False, device="cuda")
    ones_item = torch.ones(n_movies, 1, requires_grad=False, device="cuda")

    min_loss = torch.inf
    params = []
    for i in tqdm(range(iters)):
        Bu = torch.concat((bu, ones_user), dim=1)
        Bi = torch.concat((bi, ones_item), dim=1)

        pred = mu + Bu@(Bi.T) + P@(Q.T)

        # Calculate gradient only respect to know ratings
        pred = pred[uir_mat[:, 0], uir_mat[:, 1]]

#         loss = criterion(pred, expected)
        # Regularized rmse
#         loss = 1/(2*n_interactions) * torch.sum((pred - expected)**2)
        loss = 1/(2*n_interactions) * torch.sum((pred - expected)**2) + λ/2*(torch.sum(P**2) + torch.sum(Q**2))
        
        
        if min_loss > loss.detach():
            min_loss = float(loss.detach())
            params = [P.detach().cpu(), Q.detach().cpu(), bu.detach().cpu(), bi.detach().cpu()]
#             print(f"iter {i+1}: {min_loss}")

        loss.backward()

        with torch.no_grad():
            P -= lr*P.grad
            Q -= lr*Q.grad
            bu -= lr*bu.grad
            bi -= lr*bi.grad
            
        P.grad.zero_()
        Q.grad.zero_()
        bu.grad.zero_()
        bi.grad.zero_()
            
    print(min_loss)
    return params

In [4]:
## Test
mat = torch.tensor([
    [5, 2, 4, 3, 2, 3],
    [4, 3, 5, 4, 3, 2],
    [1, 5, 3, 4, 4, 5],
    [1, 0, 2, 3, 4, 2],
], dtype=torch.float32)
uir_test_mat = torch.zeros(mat.shape[0]*mat.shape[1] -1, 3, dtype=torch.long)
k = 0
for i in range(mat.shape[0]):
    for j in range(mat.shape[1]):
        if mat[i, j] != 0:
            uir_test_mat[k] = torch.tensor([i, j, mat[i, j]])
            k += 1

out_P, out_Q, out_bu, out_bi = fit(uir_mat=uir_test_mat, k=3, lr=0.08, λ=0.01, iters=70, n_users=4, n_movies=6)

  0%|          | 0/70 [00:00<?, ?it/s]

0.6628731489181519


In [5]:
out_mat = reconstruct(out_P, out_Q, out_bu, out_bi, (mat.sum()/(mat!=0).sum()))

print(out_mat)
print()
print(mat)

tensor([[5.0000, 2.8593, 5.0000, 4.0598, 3.9085, 4.0462],
        [4.7782, 3.1620, 5.0000, 4.1356, 3.9274, 3.9816],
        [3.0854, 5.0000, 2.6303, 4.0531, 4.9147, 4.3993],
        [1.5404, 5.0000, 2.6755, 3.9379, 4.9984, 3.4734]])

tensor([[5., 2., 4., 3., 2., 3.],
        [4., 3., 5., 4., 3., 2.],
        [1., 5., 3., 4., 4., 5.],
        [1., 0., 2., 3., 4., 2.]])


# Real Data

In [6]:
import numpy as np
from numpy.typing import NDArray
import pandas as pd
from numba import njit, prange

In [7]:
users = 3974
movies = 3564

train_csv = pd.read_csv("../../data/train_data.csv")
test_csv = pd.read_csv("../../data/test_data.csv")

train_csv["user_id"] = train_csv["user_id"].apply(lambda x: x - 1)
train_csv["movie_id"] = train_csv["movie_id"].apply(lambda x: x - 1)

test_csv["user_id"] = test_csv["user_id"].apply(lambda x: x - 1)
test_csv["movie_id"] = test_csv["movie_id"].apply(lambda x: x - 1)

# Split into train and validation
train_data = train_csv.drop(["timestamp"], axis=1).sample(frac=0.8)
validation_data = train_csv.drop(train_data.index).drop(["timestamp"], axis=1)

assert train_data.shape[0] + validation_data.shape[0] == train_csv.shape[0]## Split into train and validation

# Fit

In [8]:
uir_train = train_data.values
mu_train = np.mean(uir_train[:, 2])

uir_val = validation_data.values
n_val = uir_val.shape[0]

In [11]:
min_err = np.inf
best_k = 2
for k in tqdm(range(2, 50)):
    fitted_params = fit(
        uir_mat=torch.from_numpy(uir_train),
        k=k,
        lr=0.8, 
        λ=0.01, 
        iters=1000, 
        n_users=users, 
        n_movies=movies, 
        mu=mu_train
    )
    recontructed_mat = reconstruct(*fitted_params, mu_train).numpy()
    
    predicted = recontructed_mat[uir_val[:, 0], uir_val[:, 1]]
    err = np.sqrt(1/n_val * np.sum((predicted - uir_val[:, 2])**2))
    
    if min_err > err:
        min_err = err
        best_k = k

  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

1.1164147853851318


  0%|          | 0/1000 [00:00<?, ?it/s]

1.1164186000823975


  0%|          | 0/1000 [00:00<?, ?it/s]

1.1164226531982422


  0%|          | 0/1000 [00:00<?, ?it/s]

1.1164265871047974


  0%|          | 0/1000 [00:00<?, ?it/s]

1.1164302825927734


  0%|          | 0/1000 [00:00<?, ?it/s]

1.1164342164993286


  0%|          | 0/1000 [00:00<?, ?it/s]

1.116438627243042


  0%|          | 0/1000 [00:00<?, ?it/s]

1.1164424419403076


  0%|          | 0/1000 [00:00<?, ?it/s]

1.116446614265442


  0%|          | 0/1000 [00:00<?, ?it/s]

1.1164501905441284


  0%|          | 0/1000 [00:00<?, ?it/s]

1.1164542436599731


  0%|          | 0/1000 [00:00<?, ?it/s]

1.1164580583572388


  0%|          | 0/1000 [00:00<?, ?it/s]

1.1164625883102417


  0%|          | 0/1000 [00:00<?, ?it/s]

1.1164658069610596


  0%|          | 0/1000 [00:00<?, ?it/s]

1.116470217704773


  0%|          | 0/1000 [00:00<?, ?it/s]

1.1164740324020386


  0%|          | 0/1000 [00:00<?, ?it/s]

1.1164777278900146


  0%|          | 0/1000 [00:00<?, ?it/s]

1.1164823770523071


  0%|          | 0/1000 [00:00<?, ?it/s]

1.1164859533309937


  0%|          | 0/1000 [00:00<?, ?it/s]

1.116490364074707


  0%|          | 0/1000 [00:00<?, ?it/s]

1.1164937019348145


  0%|          | 0/1000 [00:00<?, ?it/s]

1.1164976358413696


  0%|          | 0/1000 [00:00<?, ?it/s]

1.1165012121200562


  0%|          | 0/1000 [00:00<?, ?it/s]

1.1165058612823486


  0%|          | 0/1000 [00:00<?, ?it/s]

1.1165101528167725


  0%|          | 0/1000 [00:00<?, ?it/s]

1.1165134906768799


  0%|          | 0/1000 [00:00<?, ?it/s]

1.1165177822113037


  0%|          | 0/1000 [00:00<?, ?it/s]

1.1165218353271484


  0%|          | 0/1000 [00:00<?, ?it/s]

1.1165250539779663


  0%|          | 0/1000 [00:00<?, ?it/s]

1.116529107093811


  0%|          | 0/1000 [00:00<?, ?it/s]

1.116532802581787


  0%|          | 0/1000 [00:00<?, ?it/s]

1.1165376901626587


  0%|          | 0/1000 [00:00<?, ?it/s]

1.1165409088134766


  0%|          | 0/1000 [00:00<?, ?it/s]

1.11654531955719


  0%|          | 0/1000 [00:00<?, ?it/s]

1.1165492534637451


  0%|          | 0/1000 [00:00<?, ?it/s]

1.1165530681610107


  0%|          | 0/1000 [00:00<?, ?it/s]

1.116557002067566


  0%|          | 0/1000 [00:00<?, ?it/s]

1.116560697555542


  0%|          | 0/1000 [00:00<?, ?it/s]

1.1165646314620972


  0%|          | 0/1000 [00:00<?, ?it/s]

1.116568922996521


  0%|          | 0/1000 [00:00<?, ?it/s]

1.1165729761123657


  0%|          | 0/1000 [00:00<?, ?it/s]

1.1165766716003418


  0%|          | 0/1000 [00:00<?, ?it/s]

1.1165807247161865


  0%|          | 0/1000 [00:00<?, ?it/s]

1.1165846586227417


  0%|          | 0/1000 [00:00<?, ?it/s]

1.1165894269943237


  0%|          | 0/1000 [00:00<?, ?it/s]

1.1165921688079834


  0%|          | 0/1000 [00:00<?, ?it/s]

1.1165966987609863


  0%|          | 0/1000 [00:00<?, ?it/s]

1.1165995597839355


In [12]:
print(best_k, min_err)

35 1.5015037434290406


## Predict

In [17]:
uir_full = train_csv.drop(["timestamp"], axis=1).values
uir_full = torch.from_numpy(uir_full)
mu_full = torch.mean(uir_full[:, 2].float())

fitted_params = fit(
    uir_mat=uir_full, k=best_k, lr=0.005, λ=0.02, iters=100, n_users=users, n_movies=movies, mu=mu_full
)
recontructed_mat = reconstruct(*fitted_params, mu_full).numpy()

  0%|          | 0/100 [00:00<?, ?it/s]

2597.774169921875


In [None]:
# test_predictions = predict_batch(
#     test_csv.drop(["id", "timestamp"], axis=1).values,
#     recontructed_mat,
# )
uir_test = test_csv.drop(["id", "timestamp"], axis=1).values
test_predictions = recontructed_mat[uir_test[:, 0], uir_test[:, 1]]
print(test_predictions[:10])

In [None]:
out_df = pd.DataFrame.from_dict(
    {
        "id": list(test_csv["id"]),
        "rating": test_predictions
    }
)

out_df.head()

In [None]:
out_df.to_csv("out_funk_svd_1.csv", index=False)