In [144]:
import torch
import torch.nn as nn

from tqdm.notebook import tqdm

from typing import List

import pandas as pd

In [145]:
users = 3974
movies = 3564

train_csv = pd.read_csv("../../../data/train_data.csv")
test_csv = pd.read_csv("../../../data/test_data.csv")

train_csv["user_id"] = train_csv["user_id"].apply(lambda x: x - 1)
train_csv["movie_id"] = train_csv["movie_id"].apply(lambda x: x - 1)

test_csv["user_id"] = test_csv["user_id"].apply(lambda x: x - 1)
test_csv["movie_id"] = test_csv["movie_id"].apply(lambda x: x - 1)

# Split into train and validation
train_data = train_csv.drop(["timestamp"], axis=1).sample(frac=0.8)
validation_data = train_csv.drop(train_data.index).drop(["timestamp"], axis=1)

assert train_data.shape[0] + validation_data.shape[0] == train_csv.shape[0]

In [146]:
train_uir = train_data.values
val_uir = validation_data.values

total_uir = train_csv.values[:, :-1]
test_ui = test_csv.values[:, 1:-1]

## Model

In [147]:
def reconstruct(
    P: torch.FloatTensor,
    Q: torch.FloatTensor,
    bu: torch.FloatTensor,
    bi: torch.FloatTensor,
    mu: float,
) -> torch.FloatTensor:
    P = P.cuda()
    Q = Q.cuda()
    bu = bu.cuda()
    bi = bi.cuda()
    
    Bu = torch.concat((bu, torch.ones(len(bu), 1, device="cuda")), dim=1)
    Bi = torch.concat((bi, torch.ones(len(bi), 1, device="cuda")), dim=1)
    
    mat = mu + Bu@Bi.T + P@Q.T
    
    return torch.clip(mat, 1, 5).cpu()

In [196]:
def fit(
    uir_mat: torch.IntTensor, # User Item rating mat
    k: int,
    lr: float,
    λ: float,
    iters: int,
    n_users: int,
    n_movies: int,
    mu: float = None,
) -> List[torch.FloatTensor]:
    # Initialize params
    uir_mat = uir_mat.cuda()
    expected = uir_mat[:, 2].float()
    n_interactions = expected.shape[0]
    
    if mu is None:
#         mu = expected.mean()
        mu = 3.6032
    
    P = torch.randn(n_users, k, requires_grad=True, device="cuda")
    Q = torch.randn(n_movies, k, requires_grad=True, device="cuda")
    with torch.no_grad():
        P *= .1
        Q *= .1
    bu = torch.zeros(n_users, 1, requires_grad=True, device="cuda")
    bi = torch.zeros(n_movies, 1, requires_grad=True, device="cuda")
    
#     criterion = nn.MSELoss()
    
    # Fit
    ones_user = torch.ones(n_users, 1, requires_grad=False, device="cuda")
    ones_item = torch.ones(n_movies, 1, requires_grad=False, device="cuda")

    min_loss = torch.inf
    params = []
    for i in tqdm(range(iters)):
        Bu = torch.concat((bu, ones_user), dim=1)
        Bi = torch.concat((ones_item, bi), dim=1)

        pred = mu + Bu@(Bi.T) + P@(Q.T)
        
        pred = pred[uir_mat[:, 0], uir_mat[:, 1]]
        print(pred)
        print(expected)
        
#         loss = criterion(pred[0], expected[0])
#         loss = 1/(2*n_interactions) * torch.sum((pred - expected)**2) + λ/2*(torch.sum(P**2) + torch.sum(Q**2))
        loss = 1/(2*n_interactions) * torch.sum((pred - expected)**2) + λ/2*(torch.sum(P**2) + torch.sum(Q**2))
        print(float(loss))
        loss.backward()

        with torch.no_grad():
            print(bu.grad.abs().max())
            print(bi.grad.abs().max())
#             print(P.grad.max())
#             print(Q.grad.max())
            P -= lr*P.grad
            Q -= lr*Q.grad
            bu -= lr*bu.grad
            bi -= lr*bi.grad

        P.grad.zero_()
        Q.grad.zero_()
        bu.grad.zero_()
        bi.grad.zero_()
        
    return params

In [199]:
fitted_params = fit(
    uir_mat=torch.from_numpy(train_uir)[:1],
    k=5,
    lr=0.1, 
    λ=0.02, 
    iters=10, 
    n_users=users, 
    n_movies=movies, 
)

  0%|          | 0/10 [00:00<?, ?it/s]

tensor([3.5766], device='cuda:0', grad_fn=<IndexBackward0>)
tensor([5.], device='cuda:0')
4.7945942878723145
tensor(1.4238, device='cuda:0')
tensor(1.4238, device='cuda:0')
tensor([3.8773], device='cuda:0', grad_fn=<IndexBackward0>)
tensor([5.], device='cuda:0')
4.396509170532227
tensor(1.1230, device='cuda:0')
tensor(1.1230, device='cuda:0')
tensor([4.1133], device='cuda:0', grad_fn=<IndexBackward0>)
tensor([5.], device='cuda:0')
4.144279479980469
tensor(0.8867, device='cuda:0')
tensor(0.8867, device='cuda:0')
tensor([4.2994], device='cuda:0', grad_fn=<IndexBackward0>)
tensor([5.], device='cuda:0')
3.981675863265991
tensor(0.7007, device='cuda:0')
tensor(0.7007, device='cuda:0')
tensor([4.4465], device='cuda:0', grad_fn=<IndexBackward0>)
tensor([5.], device='cuda:0')
3.874523878097534
tensor(0.5537, device='cuda:0')
tensor(0.5537, device='cuda:0')
tensor([4.5630], device='cuda:0', grad_fn=<IndexBackward0>)
tensor([5.], device='cuda:0')
3.801999807357788
tensor(0.4370, device='cuda:0')