In [None]:
# Python User-User Collaborative Filtering Recommender System

# Datafield has 'user_ids' and their 'rating' (1-10) of various 'game_ids'
# Prediction of unrated games for each user_id is made using:
# k-nearest neighbors found using cosine-similarity
# predicted rating based on those near-neighbors' rating weighted by cosine-similarity users

In [1]:
import numpy as np
import pandas as pd

df = pd.read_csv('inputs/boardgame-elite-users.csv')
rating_ptable = df.pivot_table(index='userID', columns='gameID', values='rating') 

rating_ptable = rating_ptable.fillna(0)

In [2]:
from sklearn.preprocessing import normalize

normalized = normalize(rating_ptable)

In [3]:
from sklearn.metrics.pairwise import cosine_similarity

sim = cosine_similarity(normalized)

In [14]:
nbor_indx = sim.argsort()[:,::-1]

# HELPER FUNCTIONS
def get_users_that_rated(gameid):
    return rating_ptable[gameid].where(rating_ptable[gameid]>0).dropna().index.tolist()
    
def get_k_nearest_neighbors(userid, gameid, k):
    assert(k>1)
    pot_nbors = get_users_that_rated(gameid)
    userid_indx = rating_ptable.index.get_loc(userid)
    
    sim_userIDs = []
    for x in nbor_indx[userid_indx]:
        if rating_ptable.index[x] in pot_nbors:
            sim_userIDs.append(rating_ptable.index[x])
        if len(sim_userIDs)>k:
            break
    return sim_userIDs[1:]

In [15]:
# Takes userID, gameID, optional k
# Returns predicted rating for gameID
def predict_rating(uid, gameid, k=10):
    sim_uids = get_k_nearest_neighbors(uid, gameid, k)
    accm_score = 0.0
    accm_weight = 0.0
    for nbor_id in sim_uids:
        sim_indx_x = rating_ptable.index.get_loc(uid)
        sim_indx_y = rating_ptable.index.get_loc(nbor_id)
        accm_score += (rating_ptable.loc[nbor_id][gameid] * sim[sim_indx_x][sim_indx_y])
        accm_weight += sim[sim_indx_x][sim_indx_y]
    score = accm_score / accm_weight
    return score

predict_rating(5480, 97903)

6.5095784352606305