# Collaborative Filtering Basic

## 1. Item-based Collaborative Filtering

In [1]:
import numpy as np

In [2]:
# Input rating matrix
# X stands for unknown ratings
X = np.nan
R = np.array([
    [5, 4, X, 1, 2, 1], #  
    [4, X, 3, 1, 1, 2], #
    [X, 5, 5, X, 3, 3], # users
    [2, X, 1, 4, 5, 4], # 
    [2, 2, 2, X, 4, X], #
    [1, 2, 1, X, 5, 4]  #
   #       items 
])

In [3]:
# Define common variables and helper functions
n, m = R.shape

def known(r):
    return not np.isnan(r)

def known_item_ratings(R, i):
    return [r for r in R[:,i] if known(r)]

def item_common_ratings(R, i, j):
    return np.array(list(filter(
        lambda r: all(known(val) for val in r), 
        np.column_stack((R[:,i], R[:,j]))
    )))

def mean_r(R, i):
    return np.mean(known_item_ratings(R, i))

In [4]:
# Calculate the matrix of item similarities
def similarity(R, i, j):
    U_ij = item_common_ratings(R, i, j)
    mu_i = mean_r(R, i)
    mu_j = mean_r(R, j)
    return sum([ 
          ((U_ij[u,0] - mu_i)*(U_ij[u,1] - mu_j)) / 
          (np.linalg.norm(U_ij[:,0] - mu_i) * np.linalg.norm(U_ij[:,1] - mu_j)) 
          for u in range(U_ij.shape[0])])

item_similarity = np.array([[
    similarity(R, i, j)
    for i in range(m)] for j in range(m)])

In [5]:
# Predict ratings based on the item similarities
k = 2 # neighborhood size

def predict_rating(R, u, i):
    # neighbors sorted by similarity
    all_neighbors = np.argsort(item_similarity[i])[::-1]
    
    # remove neighbors without ratings for u and select top k
    neighbors = list(filter(lambda j: known(R[u,j]) and not i==j, all_neighbors))[:k] 
    score = 0 
    norm = 0
    print("user %s, item %s <- item neighbors %s" % (u, i, neighbors))
    for j in neighbors:
        score = score + item_similarity[i,j]*R[u,j]
        norm = norm + abs(item_similarity[i,j])
    return score/norm

ratings = np.array([[ R[u,i] if known(R[u,i]) else predict_rating(R, u, i)
   for i in range(m)] for u in range(n)])

print("\nComplete rating matrix:")
np.set_printoptions(precision=2)
print(ratings)

user 0, item 2 <- item neighbors [1, 0]
user 1, item 1 <- item neighbors [2, 0]
user 2, item 0 <- item neighbors [2, 1]
user 2, item 3 <- item neighbors [4, 5]
user 3, item 1 <- item neighbors [2, 0]
user 4, item 3 <- item neighbors [4, 0]
user 4, item 5 <- item neighbors [4, 1]
user 5, item 3 <- item neighbors [4, 5]

Complete rating matrix:
[[5.   4.   4.5  1.   2.   1.  ]
 [4.   3.49 3.   1.   1.   2.  ]
 [5.   5.   5.   3.   3.   3.  ]
 [2.   1.49 1.   4.   5.   4.  ]
 [2.   2.   2.   1.23 4.   1.81]
 [1.   2.   1.   4.51 5.   4.  ]]


## 2. Nearest Neighbor User-based Collaborative Filterin

In [6]:
# Input rating matrix
# X stands for unknown ratings
X = np.nan
R = np.array([
    [5, 4, X, 1, 2, 1], #  
    [4, X, 3, 1, 1, 2], #
    [X, 5, 5, X, 3, 3], # users
    [2, X, 1, 4, 5, 4], # 
    [2, 2, 2, X, 4, X], #
    [1, 2, 1, X, 5, 4]  #
   #       items 
])

In [7]:
# Define common variables and helper functions
n, m = R.shape

def known(r):
    return not np.isnan(r)

def known_user_ratings(R, u):
    return [r for r in R[u, :] if known(r)]

def user_common_ratings(R, u, v):
    return np.array(list(filter(
        lambda r: all(known(val) for val in r), 
        np.column_stack((R[u, :], R[v, :]))
    ))).T

def mean_r(R, u):
    return np.mean(known_user_ratings(R, u))

In [8]:
# Calculate the matrix of user similarities
def similarity(R, u, v):
    I_uv = user_common_ratings(R, u, v)
    mu_u = mean_r(R, u)
    mu_v = mean_r(R, v)
    return sum([ 
          ((I_uv[0, i] - mu_u)*(I_uv[1, i] - mu_v)) / 
          (np.linalg.norm(I_uv[0, :] - mu_u) * np.linalg.norm(I_uv[1, :] - mu_v)) 
          for i in range(I_uv.shape[1])])

user_similarity = np.array([[
    similarity(R, u, v)
    for u in range(n)] for v in range(n)])

In [9]:
user_similarity

array([[ 1.  ,  0.87,  0.94, -0.8 , -0.59, -0.79],
       [ 0.87,  1.  ,  0.87, -0.84, -0.81, -0.88],
       [ 0.94,  0.87,  1.  , -0.94, -0.87, -0.92],
       [-0.8 , -0.84, -0.94,  1.  ,  0.86,  0.95],
       [-0.59, -0.81, -0.87,  0.86,  1.  ,  0.95],
       [-0.79, -0.88, -0.92,  0.95,  0.95,  1.  ]])

In [10]:
# Predict ratings based on the user similarities
k = 2 # neighborhood size

def predict_rating(R, u, i):
    # neighbors sorted by similarity
    all_neighbors = np.argsort(user_similarity[u])[::-1]
    
    # remove neighbors without ratings for i and select top k
    neighbors = list(filter(lambda v: known(R[v,i]) and not v==u, all_neighbors))[:k] 
    mu_u = mean_r(R, u)
    score = 0 
    norm = 0
    print("user %s, item %s <- user neighbors %s" % (u, i, neighbors))
    for v in neighbors:
        mu_v = mean_r(R, v)
        score = score + user_similarity[u,v]*(R[v,i] - mu_v)
        norm = norm + abs(user_similarity[u,v])
    return mu_u + score/norm

ratings = np.array([[ R[u,i] if known(R[u,i]) else predict_rating(R, u, i)
   for i in range(m)] for u in range(n)])

print("\nComplete rating matrix:")
np.set_printoptions(precision=2)
print(ratings)

user 0, item 2 <- user neighbors [2, 1]
user 1, item 1 <- user neighbors [0, 2]
user 2, item 0 <- user neighbors [0, 1]
user 2, item 3 <- user neighbors [0, 1]
user 3, item 1 <- user neighbors [5, 4]
user 4, item 3 <- user neighbors [3, 0]
user 4, item 5 <- user neighbors [5, 3]
user 5, item 3 <- user neighbors [3, 0]

Complete rating matrix:
[[5.   4.   3.5  1.   2.   1.  ]
 [4.   3.4  3.   1.   1.   2.  ]
 [6.11 5.   5.   2.59 3.   3.  ]
 [2.   2.65 1.   4.   5.   4.  ]
 [2.   2.   2.   3.63 4.   3.61]
 [1.   2.   1.   3.76 5.   4.  ]]
