In [1]:
import torch
import torch.nn as nn

from tqdm.notebook import tqdm

In [2]:
test_mat = torch.tensor([
    [5, 2, 4, 3, 2, 3],
    [4, 3, 5, 4, 3, 2],
    [1, 5, 3, 4, 4, 5],
    [1, 0, 2, 3, 4, 2],
], dtype=torch.float32)

In [3]:
n_users = test_mat.shape[0]
n_movies = test_mat.shape[1]
k = 3

P = torch.rand(n_users, k, requires_grad=True, device="cuda")
Q = torch.rand(n_movies, k, requires_grad=True, device="cuda")
bu = torch.rand(n_users, 1, requires_grad=True, device="cuda")
bi = torch.rand(n_movies, 1, requires_grad=True, device="cuda")

mu = (test_mat.sum()/(test_mat!=0).sum())

criterion = nn.MSELoss()

expected = test_mat.cuda()

In [4]:
lr = 0.008
ones_user = torch.ones(n_users, 1, requires_grad=False, device="cuda")
ones_item = torch.ones(n_movies, 1, requires_grad=False, device="cuda")

min_loss = torch.inf
params = []
for i in tqdm(range(100)):
    Bu = torch.concat((bu, ones_user), dim=1)
    Bi = torch.concat((bi, ones_item), dim=1)
    
    pred = mu + Bu@(Bi.T) + P@(Q.T)
    
    loss = criterion(pred, expected)
    print(float(loss))
    if min_loss > loss.detach():
        min_loss = float(loss.detach())
        params = [P.detach().cpu(), Q.detach().cpu(), bu.detach().cpu(), bi.detach().cpu()]
    
    loss.backward()
    
    with torch.no_grad():
        P -= lr*P.grad
        Q -= lr*Q.grad
        bu -= lr*bu.grad
        bi -= lr*bi.grad

  0%|          | 0/100 [00:00<?, ?it/s]

5.815906524658203
5.760279655456543
5.653106689453125
5.498923301696777
5.304213047027588
5.078001976013184
4.828884124755859
4.567636013031006
4.30161714553833
4.038580894470215
3.7856173515319824
3.547283887863159
3.327199935913086
3.1278090476989746
2.949627637863159
2.7935123443603516
2.6581802368164062
2.542518138885498
2.444793224334717
2.363454818725586
2.2961463928222656
2.240525007247925
2.194373607635498
2.1551785469055176
2.120570659637451
2.0874528884887695
2.0530571937561035
2.014777660369873
1.9694818258285522
1.9147355556488037
1.847806692123413
1.767179250717163
1.672750473022461
1.5654308795928955
1.447908639907837
1.3254187107086182
1.2046213150024414
1.0952690839767456
1.0078500509262085
0.9529324769973755
0.9407093524932861
0.9766510725021362
1.0614789724349976
1.1885228157043457
1.3424434661865234
1.5012110471725464
1.6367125511169434
1.7181706428527832
1.7239031791687012
1.6405357122421265
1.4742152690887451
1.2520241737365723
1.014551043510437
0.8154513835906982


In [5]:
print(min_loss)
print(params)

0.7129956483840942
[tensor([[-1.0555, -2.4306, -1.2819],
        [-0.7446, -1.8783, -1.2457],
        [-3.2433, -1.9906, -0.1469],
        [-4.3032, -3.4648, -2.3825]]), tensor([[ 1.6477, -0.5807, -0.7546],
        [-0.3543, -0.0238,  0.4515],
        [ 0.4147, -0.0974, -0.0741],
        [-0.2958,  0.3857, -0.2541],
        [-0.1695,  0.9232,  0.0155],
        [-0.3522,  0.3861,  1.0840]]), tensor([[-1.9964],
        [-1.5842],
        [-0.8658],
        [-3.9659]]), tensor([[-0.1801],
        [ 0.9815],
        [-0.1303],
        [ 0.5197],
        [ 0.0578],
        [ 0.1122]])]


In [6]:
ones_user_cpu = torch.ones(n_users, 1, requires_grad=False)
ones_item_cpu = torch.ones(n_movies, 1, requires_grad=False)

out_P, out_Q, out_bu, out_bi = params
out_Bu = torch.concat((out_bu, ones_user_cpu), dim=1)
out_Bi = torch.concat((out_bi, ones_item_cpu), dim=1)

out_mat = mu + out_Bu@out_Bi.T + out_P@out_Q.T
out_mat = torch.clip(out_mat, 1, 5)

print(out_mat)
print()
print(test_mat)

tensor([[5.0000, 2.1110, 4.3714, 2.8802, 2.0172, 2.0370],
        [5.0000, 2.4086, 4.3902, 3.2064, 2.4987, 2.2263],
        [1.0000, 4.4978, 3.1898, 3.9965, 2.8772, 4.3347],
        [1.6509, 1.0000, 3.4633, 2.6983, 1.4821, 1.3675]])

tensor([[5., 2., 4., 3., 2., 3.],
        [4., 3., 5., 4., 3., 2.],
        [1., 5., 3., 4., 4., 5.],
        [1., 0., 2., 3., 4., 2.]])
