In [1]:
import pandas as pd
import numpy as np
import requests
import regex as re
import time
import os
import gc
import json
import copy
from statistics import mean
from statistics import median

# ignore warnings (gets rid of Pandas copy warnings)
import warnings
warnings.filterwarnings('ignore')
pd.options.display.max_columns = None

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 30)


from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

from scipy import spatial

# scoring and algorithm selection packages
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score 
from sklearn.inspection import permutation_importance

# visualization packages
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from surprise import KNNWithMeans, SVD, Dataset, Reader, dump, accuracy, NMF, BaselineOnly
from surprise.model_selection.validation import cross_validate
from surprise.model_selection import KFold
from surprise.prediction_algorithms.matrix_factorization import SVD, SVDpp, NMF
from surprise.prediction_algorithms.slope_one import SlopeOne
from surprise.prediction_algorithms.co_clustering import CoClustering
from surprise.prediction_algorithms.random_pred import NormalPredictor
from surprise.prediction_algorithms.baseline_only import BaselineOnly
from surprise.model_selection.search import GridSearchCV

## Load Files

In [2]:
# Opening JSON file
#with open('data_cleaned/item_similarities_real_full.json') as json_file:
#    item_similarities = json.load(json_file)

In [2]:
# Opening JSON file
with open('real_ratings/real_user_ratings_scaled.json') as json_file:
    user_ratings = json.load(json_file)

In [3]:
# Opening JSON file
with open('data_cleaned/item_similarities_real_full_75.json') as json_file:
    item_similarities = json.load(json_file)

In [4]:
# Opening JSON file
#with open('synthetic_ratings/users_synthetic_1000_2.json') as json_file:
#    user_ratings = json.load(json_file)

In [5]:
# Opening JSON file
with open('data_cleaned/user_means.json') as json_file:
    user_means = json.load(json_file)

In [6]:
# Opening JSON file
with open('data_cleaned/item_means.json') as json_file:
    item_means = json.load(json_file)

In [7]:
# Opening JSON file
with open('data_cleaned/game_id_lookup.json') as json_file:
    game_id_lookup = json.load(json_file)

In [8]:
game_ids = list(item_similarities.keys())

## Predict items one user - Real Data

In [9]:
user='Threnody'
user_mean = user_means[user]

In [18]:
user_ratings['Threnody']

{'25190': -2,
 '13': -11,
 '3076': -19,
 '822': -4,
 '68448': -2,
 '2651': -18,
 '30549': 4,
 '31260': -9,
 '14996': -5,
 '266192': -11,
 '110327': -7,
 '9209': -4,
 '70323': -1,
 '36218': 4,
 '28143': -7,
 '34635': -5,
 '178900': 4,
 '237182': -11,
 '162886': -14,
 '167791': 6,
 '98778': -1,
 '320': -23,
 '201808': -8,
 '41114': -2,
 '230802': 2,
 '124361': -6,
 '18602': -7,
 '157354': -2,
 '169786': 8,
 '148228': 6,
 '65244': 3,
 '1406': -14,
 '157969': -1,
 '170216': 0,
 '146508': -10,
 '37111': 3,
 '129622': 7,
 '224517': -7,
 '50': -2,
 '10547': 0,
 '478': 5,
 '11': 0,
 '40834': 3,
 '3955': -5,
 '118': -14,
 '199792': -1,
 '291457': -8,
 '27162': -1,
 '50381': 2,
 '2453': 2,
 '192291': 1,
 '3': -14,
 '233867': -1,
 '218603': -1,
 '43443': -6,
 '172818': -8,
 '1294': -7,
 '124708': -2,
 '182028': 6,
 '244521': 1,
 '239188': -1,
 '182874': -3,
 '37046': 3,
 '148949': 5,
 '102652': -2,
 '131357': 10,
 '219513': -3,
 '199561': 5,
 '2921': -22,
 '9216': -15,
 '155426': 5,
 '236457': 2,

### One Item Test

In [37]:
game = '178900'

# produces dict of similarities for game_ids that user has rated
intersect_items = {key:value for (key, value) in item_similarities[game].items() if key in user_ratings[user].keys()}
intersect_items

{}

In [38]:
game_id_lookup[game]

'Codenames'

In [39]:
item_similarities[game]

{'39431': 0.7718916535377502,
 '277424': 0.777122437953949,
 '3598': 0.7836260795593262,
 '10269': 0.7794942855834961,
 '36777': 0.8093695044517517,
 '135840': 0.7707837820053101,
 '154098': 0.857902467250824,
 '23870': 0.756094753742218,
 '1274': 0.7701854109764099,
 '41198': 0.7979899048805237,
 '330145': 0.9320590496063232}

In [40]:
intersect_items[game]

KeyError: '178900'

In [41]:
user_ratings[user][game]

4

In [42]:
item_means[game]

7.598622200044356

In [43]:
weighted = [intersect_items[game]*(user_ratings[user][game]/10) for game in intersect_items.keys()]
weighted

[]

In [44]:
numer = sum(weighted)
numer

0

In [45]:
denom = sum(intersect_items.values())
denom

0

In [46]:
numer/denom

ZeroDivisionError: division by zero

In [34]:
scaled_prediction = numer/denom + user_mean
scaled_prediction

6.686616578405754

In [35]:
sum([(user_ratings[user][game]/10) for game in intersect_items.keys()])

-3.4

In [71]:
game = '256170'

intersect_items = {key:value for (key, value) in item_similarities[game].items() if key in user_ratings[user].keys()}
intersect_items

{'258036': 0.7731715440750122,
 '239188': 0.8248884677886963,
 '163967': 0.8215402960777283,
 '220877': 0.9615119099617004,
 '15364': 0.888222873210907,
 '118063': 0.9161118865013123}

In [72]:
weighted = [(intersect_items[game]*(user_ratings[user][game]/10)) for game in intersect_items.keys()]
weighted

[-0.23195146322250365,
 -0.08248884677886964,
 0.5750782072544097,
 -1.1538142919540404,
 -0.08882228732109071,
 -2.1986685276031492]

In [73]:
sum(weighted)

-3.180667209625244

In [74]:
sum(intersect_items.values())

5.1854469776153564

In [75]:
sum(weighted)/sum(intersect_items.values())

-0.6133834215942451

In [76]:
mean(weighted)

-0.5301112016042073

In [77]:
item_means[game]

7.068965517241379

In [78]:
game = '256170'

start = time.time()

intersect_items = {key:value for (key, value) in item_similarities[game].items() if key in user_ratings[user].keys()}
weighted = [(intersect_items[game]*(user_ratings[user][game]/10)) for game in intersect_items.keys()]
#prediction = mean(weighted) + user_mean
prediction = sum(weighted)/sum(intersect_items.values()) + item_means[game]


end = time.time()
print(end-start)
prediction

0.0010006427764892578


6.455582095647134

### Make Predictions

In [59]:
start = time.time()

predictions = {}
predictions[user] = {}

user_mean = user_means[user]

for game in game_ids:

    intersect_items = {key:value for (key, value) in item_similarities[game].items() if key in user_ratings[user].keys()}
    
    if len(intersect_items)==0:
        continue
    
    else:
        intersect_items = {key:value for (key, value) in item_similarities[game].items() if key in user_ratings[user].keys()}
        weighted = [intersect_items[game]*(user_ratings[user][game]/10) for game in intersect_items.keys()]
        prediction = sum(weighted)/sum(intersect_items.values()) + item_means[game]
        
        if prediction > user_mean:
            predictions[user][game] = round(prediction, 1)

end = time.time()
print(end-start)

2.826305389404297


In [60]:
relevant = [key for key in user_ratings[user].keys() if user_ratings[user][key]>user_mean]
len(relevant)

29

In [61]:
relevant_and_recommended = [key for key in predictions[user].keys() if key in user_ratings[user].keys() and user_ratings[user][key]>user_mean and predictions[user][key]>user_mean]
len(relevant_and_recommended)

4

In [62]:
len(relevant_and_recommended)/len(relevant)

0.13793103448275862

In [63]:
overlap_items = [key for key in predictions[user].keys() if key in user_ratings[user].keys()]

In [64]:
preds = [value for (key, value) in predictions[user].items() if key in overlap_items]
preds[:10]

[7.9, 8.1, 9.8, 7.9, 7.9, 7.8, 8.1, 7.7, 7.5, 7.5]

In [79]:
actuals = [(value/10+item_means[key]) for (key, value) in user_ratings[user].items() if key in overlap_items]
actuals[:10]

[7.970513628684536,
 7.977541309519929,
 5.955604557148107,
 8.030818678882815,
 7.987317437191287,
 6.9682999863890025,
 6.479960513326752,
 8.037829614604462,
 5.975320708613316,
 7.038164332714695]

In [80]:
mean_absolute_error(preds, actuals)

1.1452840460749285

In [81]:
new_predictions = {key:value for (key, value) in predictions[user].items() if key not in user_ratings[user].keys()}

In [82]:
df = pd.DataFrame.from_dict(new_predictions, orient='index').sort_values(0, ascending=False)
df['Game'] = df.index.map(game_id_lookup)
df.head(30)

Unnamed: 0,0,Game
257180,10.8,Kingdom Builder: Family Box
22359,10.8,Steel Wolves: The German Submarine Campaign Ag...
294652,10.7,By Stealth and Sea
306697,10.7,Smash Up: Marvel
320097,10.6,ECK: A solo trick-taking card game
290500,10.6,Flip & Fish
127493,10.4,Bolt Action
271320,10.4,The Castles of Burgundy
310885,10.4,Belaad: The Land of Swords and Quills
295785,10.2,Euthia: Torment of Resurrection


## Predict items one user - Synth 250 Data

In [None]:
# Opening JSON file
with open('data_cleaned/item_similarities_synth250_over50only.json') as json_file:
    item_similarities = json.load(json_file)

In [None]:
user='Threnody'

### Make Predictions

In [None]:
start = time.time()

predictions = {}
predictions[user] = {}

for game in game_ids:

    intersect_items = {key:value for (key, value) in item_similarities[game].items() if key in user_ratings[user].keys()}
    
    if len(intersect_items)==0:
        continue
    
    else:
        intersect_items = {key:value for (key, value) in item_similarities[game].items() if key in user_ratings[user].keys()}
        weighted = [intersect_items[game]*user_ratings[user][game]/10 for game in intersect_items.keys()]
        scaled_prediction = sum(weighted)/sum(intersect_items.values())
        
        if scaled_prediction > 0:
            predictions[user][game] = round(scaled_prediction, 1)
            
            
relevant = [key for key in user_ratings[user].keys() if user_ratings[user][key]>0]
relevant_and_recommended = [key for key in predictions[user].keys() if key in user_ratings[user].keys() and user_ratings[user][key]>0 and predictions[user][key]>0]
recall = len(relevant_and_recommended)/len(relevant)
overlap_items = [key for key in predictions[user].keys() if key in user_ratings[user].keys()]
preds = [value for (key, value) in predictions[user].items() if key in overlap_items]
actuals = [value/10 for (key, value) in user_ratings[user].items() if key in overlap_items]
mae = mean_absolute_error(preds, actuals)
new_predictions = {key:value+user_means[user] for (key, value) in predictions[user].items() if key not in user_ratings[user].keys()}

end = time.time()
print(end-start)
print(recall)
print(mae)

In [None]:
df = pd.DataFrame.from_dict(new_predictions, orient='index').sort_values(0, ascending=False)
df['Game'] = df.index.map(game_id_lookup)
df.head(50)