## Item colaborative filtering

In [1]:
from typing import List
from numba import jit, prange

In [2]:
import numpy as np
np.set_printoptions(suppress=True)

In [3]:
ratings = np.array([
    [4, -1, 3, 4, -1],
    [1, 2, 5, -1, 3],
    [1, -1, -1, 5, -1],
    [-1, 3, 4, 5, 3],
    [2, -1, 5, 4, 5],
])

In [4]:
@jit(nopython=True, parallel=True)
def get_means(ratings: np.array, eps=1e-15) -> List[int]:
    means = np.zeros(ratings.shape[1])
    counts = np.zeros(ratings.shape[1])
    
    for i in prange(ratings.shape[1]):
        for u in range(ratings.shape[0]):
            if ratings[u, i] != -1:
                means[i] += ratings[u, i]
                counts[i] += 1
                
    return means/(counts+eps)

assert np.allclose(get_means(ratings).round(2), np.array([2, 2.5, 4.25, 4.5, 3.67])) is True

In [5]:
@jit(nopython=True)
def sim(i: int, j: int, ratings: np.array, means: np.array, eps=1e-15) -> float:
    user_idxs = []
    
    # Get users (indexes) that rated both items i and j
    for u in range(ratings.shape[0]):
        if ratings[u, i] != -1 and ratings[u, j] != -1:
            user_idxs.append(u)
    
    # Calculate similarity        
    n_ui, n_uj, cov_ui_uj = 0, 0, 0
    for u in user_idxs:
        r_ui_r = ratings[u, i] - means[i]
        r_uj_r = ratings[u, j] - means[j]
        
        cov_ui_uj += (r_ui_r * r_uj_r)
        n_ui += r_ui_r**2
        n_uj += r_uj_r**2
        
    return cov_ui_uj/(np.sqrt(n_ui)*np.sqrt(n_uj) + eps)

for i, expected in zip(range(4), [0.45, 0.0, 0.37, -0.95]):
    assert abs(round(sim(4, i, ratings, get_means(ratings)), 2) - expected) < 1e-5

In [13]:
@jit(nopython=True)
def pred(u: int, i: int, k: int, ratings: np.array, means: np.array, eps=1e-15) -> float:
    chosen_items = 0
    
    similarities = []
    for j in range(ratings.shape[1]):
        if ratings[u, j] != -1:
            similarities.append((sim(i, j, ratings, means), ratings[u, j]))
            
    numerator, denominator = 0, 0
    for similarity_ij, rating_uj in sorted(similarities, reverse=True)[:k]:
        numerator += similarity_ij*rating_uj
        denominator += similarity_ij
    
    return int(round(numerator/(denominator + eps)))
    
assert abs(round(pred(0, 4, 2, ratings, get_means(ratings)), 2) - round(3.55)) == 0