In [None]:
# Python User-User Collaborative Filtering Recommender System
# consider not using pivot table?
# experiment with different similarity functions, number of neighbors, ...
# Predicted ratings could be rounded to increase accuracy

In [1]:
import numpy as np
import pandas as pd

df = pd.read_csv('inputs/boardgame-elite-users.csv')
rating_ptable = df.pivot_table(index='userID', columns='gameID', values='rating') 

rating_ptable = rating_ptable.fillna(0)

In [2]:
from sklearn.preprocessing import normalize

# Normalize user ratings to correct for biased scale use (scale is 1-10, user may rate from min=3 max=8)
normalized = normalize(rating_ptable)

In [3]:
# Get User Similarity Matrix
# Also try Pearson coefficient, city-block, etc...
from sklearn.metrics.pairwise import cosine_similarity

similarity_matrix = cosine_similarity(normalized)

In [4]:
# Get indices of most similar users, argsort returns list of indices of the values
# as if the values had been sorted in descending order
nbor_indx = similarity_matrix.argsort()[:,::-1]

# Helper function called by "get_ten_nearest"
# Takes a gameID and returns list of userID
def get_users_that_rated(gameid):
    return rating_ptable[gameid].where(rating_ptable[gameid]>0).dropna().index.tolist()
    
# Helper function called by "predict_rating"
# Takes df, userID, and gameID that has not been rated by userID ; returns list of 10 most similar userIDs 
# THAT HAVE rated the game (descending order of similarity)
def get_k_nearest_neighbors(userid, gameid, k):
    assert(k>1)
    potential_nbors = get_users_that_rated(gameid)
    userid_indx = rating_ptable.index.get_loc(userid)
    
    sim_userIDs = []
    for x in nbor_indx[uid_indx]:
        if df.index[x] in pot_nbors:
            sim_userIDs.append(df.index[x])
        if len(sim_userIDs)>k:
            break
    # return neighbors excepting self
    return sim_userIDs[1:]
    
print(get_k_nearest_neighbors(66705,13, 10))

[187094, 128117, 55092, 13340, 19677, 37860, 12305, 96817, 119312, 27853]


In [5]:
# depends on rating_ptable existing
# takes userID, gameID, optional k
# returns predicted rating for gameID
def predict_rating(uid, gameid, k=10):
    sim_uids = get_k_nearest_neighbors(uid, gameid, k)
    accm_score = 0.0
    accm_weight = 0.0
    for nbor_id in sim_uids:
        sim_indx_x = rating_ptable.index.get_loc(uid)
        sim_indx_y = rating_ptable.index.get_loc(nbor_id)
        accm_score += (rating_ptable.loc[nbor_id][gameid] * sim[sim_indx_x][sim_indx_y])
        accm_weight += sim[sim_indx_x][sim_indx_y]
    score = accm_score / accm_weight
    return score

predict_rating(3080, 187645)

7.613137767277404