In [1]:
import numpy as np
from numba import jit
from tqdm.notebook import tqdm

from surprise import SVD
from surprise import Dataset
from surprise import accuracy
from surprise.model_selection import train_test_split
from surprise import Reader

import pandas as pd

In [2]:
mat = np.array([
    [5, 2, 4, 3, 2, 3],
    [4, 3, 5, 4, 3, 2],
    [1, 5, 3, 4, 4, 5],
    [1, 0, 2, 3, 4, 2],
])

uir_test_mat = np.zeros((mat.shape[0]*mat.shape[1] -1, 3), dtype=int)
k = 0
for i in range(mat.shape[0]):
    for j in range(mat.shape[1]):
        if mat[i, j] != 0:
            uir_test_mat[k] = [i, j, mat[i, j]]
            k += 1

In [3]:
# RS HD page 172 (chrome)

def fit_funk_svd(train_data, n_users, n_items, k=100, α=.005, λ=.02, n_iters=20):
    """
    train_data: array Nx3
    """
    bu = np.zeros(n_users, np.double)
    bi = np.zeros(n_items, np.double)
    
    P = np.random.normal(0, .1, (n_users, k))
    Q = np.random.normal(0, .1, (n_items, k))
    
    μ = np.mean(train_data[:, 2])
    
    for it in tqdm(range(n_iters)):
        for u, i, r in train_data:
            pred = μ + bu[u] + bi[i] + np.dot(P[u], Q[i])
            
            error = r - pred
            
            # Updating
            bu[u] += α * (error - λ*bu[u])
            bi[i] += α * (error - λ*bi[i])
            
            for f in range(k):
                P[u,f], Q[i,f] = P[u,f] + α*(error*Q[i,f] - λ*P[u,f]), Q[i, f] + α*(error*P[u,f] - λ*Q[i,f])
    
    return μ, bu, bi, P, Q

In [4]:
params = fit_funk_svd(uir_test_mat, 4, 6)

  0%|          | 0/20 [00:00<?, ?it/s]

In [5]:
def predict(u, i, params):
    μ, bu, bi, P, Q = params
    
    return μ + bu[u] + bi[i] + np.dot(P[u], Q[i])

In [15]:
res_test = np.zeros_like(mat, dtype=float)

for u in range(mat.shape[0]):
    for i in range(mat.shape[1]):
        res_test[u, i] = predict(u, i, params)
        
print(res_test.round())
print()
print(mat)

[[4. 3. 3. 3. 3. 3.]
 [3. 3. 4. 3. 3. 3.]
 [3. 4. 3. 4. 4. 4.]
 [2. 3. 3. 3. 3. 3.]]

[[5 2 4 3 2 3]
 [4 3 5 4 3 2]
 [1 5 3 4 4 5]
 [1 0 2 3 4 2]]


## Using surprise

In [7]:
usrs_list = list(uir_test_mat[:, 0])
itms_list = list(uir_test_mat[:, 1])
rts_list = list(uir_test_mat[:, 2])

df = pd.DataFrame.from_dict({
    "user_id": usrs_list + [1000],
    "item_id": itms_list + [1000],
    "rating": rts_list + [5],
})
df.head()

Unnamed: 0,user_id,item_id,rating
0,0,0,5
1,0,1,2
2,0,2,4
3,0,3,3
4,0,4,2


In [24]:
dataset = Dataset.load_from_df(df, Reader(rating_scale=(1, 5)))
train, val = train_test_split(dataset, test_size=.001, random_state=32)
val

[(1000, 1000, 5.0)]

In [27]:
model = SVD()
model.fit(train)

preds = model.test(train.all_ratings())
accuracy.rmse(preds)

RMSE: 1.2432


1.2432321358419902

In [28]:
surprise_test = np.zeros_like(mat, dtype=float)

# for p in preds:
#     surprise_test[train.to_raw_uid(p.uid), train.to_raw_iid(p.iid)] = p.est
for u in range(mat.shape[0]):
    for i in range(mat.shape[1]):
        surprise_test[u, i] = model.predict(u, i).est
        
print(surprise_test.round())
print()
print(mat)

[[3. 3. 3. 3. 3. 3.]
 [3. 3. 4. 4. 3. 3.]
 [3. 4. 3. 4. 3. 4.]
 [2. 3. 3. 3. 3. 3.]]

[[5 2 4 3 2 3]
 [4 3 5 4 3 2]
 [1 5 3 4 4 5]
 [1 0 2 3 4 2]]


In [13]:
model.predict(3, 1)

Prediction(uid=3, iid=1, r_ui=None, est=2.996344968969467, details={'was_impossible': False})