# Nearest Neighbour item-based collaborative filtering

This is just a toy implementation, just to illustrate a clear understanding of item-based cf using a numerical example

### Use Case
We have a fixed number of users and iterms, and have interaction score only for few user-item pair. Now, our goal is to estimate the scores for the remaining user-item pairs.

### Prototype: Approach and Data
We are using a standard Nearest Neighbour Item-based Collaborative Filtering Algorithm. for which we have our custom-defined rating matrix for this demonstration purpose. No external datasets are used.

In [1]:
import numpy as np 

X = np.nan

R = np.array([
[5, 4, X, 1, 2, 1],
[4, X, 3, 1, 1, 2],
[X, 5, 5, X, 3, 3],
[2, X, 1, 4, 5, 4],
[2, 2, 2, X, 4, X],
[1, 2, 1, X, 5, 4]
])

In [2]:
# Define common variables and helper functions

n, m = R.shape

def known(r):
    return not np.isnan(r)

def known_item_ratings(R,i):
    return [r for r in R[:,i] if known(r)]

def item_common_ratings(R, i, j):
    return np.array(list(filter(
        lambda r: all(known(val) for val in r),
        np.column_stack((R[:,i], R[:,j]))
    )))

def mean_r(R,i):
    return np.mean(known_item_ratings(R,i))

In [3]:
# Calculating the matrix of item similarities
def similarity(R, i, j):
    U_ij = item_common_ratings(R,i,j)
    mu_i = mean_r(R, i)
    mu_j = mean_r(R, j)
    return sum([
        ((U_ij[u,0] - mu_i)*(U_ij[u,1] - mu_j))/
        (np.linalg.norm(U_ij[:,0]-mu_i) * np.linalg.norm(U_ij[:,1] - mu_j))
        for u in range(U_ij.shape[0])])

item_similarity = np.array([[
    similarity(R, i, j)
    for i in range(m)] for j in range(m)])


In [4]:
# Predict ratings based on the item similarities
k = 2 # neighbourhood size

def predict_rating(R, u, i):
    # neighbours sorted by similarity
    all_neighbours = np.argsort(item_similarity[i])[::-1]

    #remove neighbours withour ratings for u and select top k
    neighbors = list(filter(lambda j: known(R[u,j]) and not i==j, all_neighbours))[:k]
    score = 0
    norm = 0
    print("User %s, item %s <- item neighbors %s" % (u, i, neighbors))

    for j in neighbors:
        score = score + item_similarity[i,j]*R[u,j]
        norm = norm + abs(item_similarity[i,j])
    return score/norm

ratings = np.array([[ R[u,i] if known(R[u,i]) else predict_rating(R, u, i)
                     for i in range(m)] for u in range(n)])

print("\nComplete rating matrix:")
np.set_printoptions(precision=2)
print(ratings)

User 0, item 2 <- item neighbors [1, 0]
User 1, item 1 <- item neighbors [2, 0]
User 2, item 0 <- item neighbors [2, 1]
User 2, item 3 <- item neighbors [4, 5]
User 3, item 1 <- item neighbors [2, 0]
User 4, item 3 <- item neighbors [4, 0]
User 4, item 5 <- item neighbors [4, 1]
User 5, item 3 <- item neighbors [4, 5]

Complete rating matrix:
[[5.   4.   4.5  1.   2.   1.  ]
 [4.   3.49 3.   1.   1.   2.  ]
 [5.   5.   5.   3.   3.   3.  ]
 [2.   1.49 1.   4.   5.   4.  ]
 [2.   2.   2.   1.23 4.   1.81]
 [1.   2.   1.   4.51 5.   4.  ]]
