In [1]:
import pandas as pd
import numpy as np
import requests
import regex as re
import time
import os
import gc
import json
import copy
from statistics import mean
from statistics import median

# ignore warnings (gets rid of Pandas copy warnings)
import warnings
warnings.filterwarnings('ignore')
pd.options.display.max_columns = None

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 30)


from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

from scipy import spatial

# scoring and algorithm selection packages
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score 
from sklearn.inspection import permutation_importance

# visualization packages
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from surprise import KNNWithMeans, SVD, Dataset, Reader, dump, accuracy, NMF, BaselineOnly
from surprise.model_selection.validation import cross_validate
from surprise.model_selection import KFold
from surprise.prediction_algorithms.matrix_factorization import SVD, SVDpp, NMF
from surprise.prediction_algorithms.slope_one import SlopeOne
from surprise.prediction_algorithms.co_clustering import CoClustering
from surprise.prediction_algorithms.random_pred import NormalPredictor
from surprise.prediction_algorithms.baseline_only import BaselineOnly
from surprise.model_selection.search import GridSearchCV

## Load Files

In [None]:
# Opening JSON file
#with open('data_cleaned/item_similarities_real_full.json') as json_file:
#    item_similarities = json.load(json_file)

In [None]:
# Opening JSON file
with open('real_ratings/user_ratings_unscaled.json') as json_file:
    user_ratings = json.load(json_file)

In [None]:
# Opening JSON file
with open('item_similarities/user_ratings_unscaled.json') as json_file:
    user_ratings = json.load(json_file)

In [None]:
# Opening JSON file
#with open('synthetic_ratings/users_synthetic_1000_2.json') as json_file:
#    user_ratings = json.load(json_file)

In [None]:
# Opening JSON file
with open('data_cleaned/user_means.json') as json_file:
    user_means = json.load(json_file)

In [None]:
# Opening JSON file
with open('data_cleaned/item_means.json') as json_file:
    item_means = json.load(json_file)

In [None]:
# Opening JSON file
with open('data_cleaned/game_id_lookup.json') as json_file:
    game_id_lookup = json.load(json_file)

In [None]:
game_ids = list(item_similarities.keys())

## Predict items one user - Real Data

In [None]:
user='Threnody'
user_mean = user_means[user]

In [None]:
user_ratings['Threnody']

### One Item Test

In [None]:
item_similarities[game]

In [None]:
game = '256170'

# produces dict of similarities for game_ids that user has rated
intersect_items = {key:value for (key, value) in item_similarities[game].items() if key in user_ratings[user].keys()}
intersect_items

In [None]:
game_id_lookup[game]

In [None]:
game = '41114'

In [None]:
game_id_lookup[game]

In [None]:
intersect_items[game]

In [None]:
user_ratings[user][game]

In [None]:
item_means[game]

In [None]:
user_ratings[user][game]-item_means[game]

In [None]:
intersect_items[game]*(user_ratings[user][game]-item_means[game])

In [None]:
game = '116'

In [None]:
game_id_lookup[game]

In [None]:
intersect_items[game]

In [None]:
user_ratings[user][game]

In [None]:
item_means[game]

In [None]:
user_ratings[user][game]-item_means[game]

In [None]:
intersect_items[game]*(user_ratings[user][game]-item_means[game])

In [None]:
game = '256170'

In [None]:
weighted = [intersect_items[game]*(user_ratings[user][game]-item_means[game]) for game in intersect_items.keys()]
weighted

In [None]:
numer = sum(weighted)
numer

In [None]:
denom = sum(intersect_items.values())
denom

In [None]:
numer/denom

In [None]:
scaled_prediction = numer/denom + user_mean
scaled_prediction

In [None]:
sum([(user_ratings[user][game]-item_means[game]) for game in intersect_items.keys()])

In [None]:
game = '256170'

start = time.time()

intersect_items = {key:value for (key, value) in item_similarities[game].items() if key in user_ratings[user].keys()}
weighted = [(intersect_items[game]*(user_ratings[user][game]-item_means[game])) for game in intersect_items.keys()]
#prediction = mean(weighted) + user_mean
prediction = sum(weighted)/sum(intersect_items.values()) + user_mean


end = time.time()
print(end-start)
prediction

### Make Predictions

In [None]:
start = time.time()

predictions = {}
predictions[user] = {}

user_mean = user_means[user]

for game in game_ids:

    intersect_items = {key:value for (key, value) in item_similarities[game].items() if key in user_ratings[user].keys()}
    
    if len(intersect_items)==0:
        continue
    
    else:
        intersect_items = {key:value for (key, value) in item_similarities[game].items() if key in user_ratings[user].keys()}
        weighted = [intersect_items[game]*(user_ratings[user][game]-item_means[game]) for game in intersect_items.keys()]
        #prediction = sum(weighted)/sum(intersect_items.values()) + user_mean
        prediction = mean(weighted) + user_mean
        
        if prediction > user_mean:
            predictions[user][game] = round(prediction, 1)

end = time.time()
print(end-start)

In [None]:
relevant = [key for key in user_ratings[user].keys() if user_ratings[user][key]>user_mean]
len(relevant)

In [None]:
relevant_and_recommended = [key for key in predictions[user].keys() if key in user_ratings[user].keys() and user_ratings[user][key]>user_mean and predictions[user][key]>user_mean]
len(relevant_and_recommended)

In [None]:
len(relevant_and_recommended)/len(relevant)

In [None]:
overlap_items = [key for key in predictions[user].keys() if key in user_ratings[user].keys()]

In [None]:
preds = [value for (key, value) in predictions[user].items() if key in overlap_items]
preds[:10]

In [None]:
actuals = [value for (key, value) in user_ratings[user].items() if key in overlap_items]
actuals[:10]

In [None]:
mean_absolute_error(preds, actuals)

In [None]:
new_predictions = {key:value for (key, value) in predictions[user].items() if key not in user_ratings[user].keys()}

In [None]:
df = pd.DataFrame.from_dict(new_predictions, orient='index').sort_values(0, ascending=False)
df['Game'] = df.index.map(game_id_lookup)
df.head(30)

## Predict items one user - Synth 250 Data

In [None]:
# Opening JSON file
with open('data_cleaned/item_similarities_synth250_over50only.json') as json_file:
    item_similarities = json.load(json_file)

In [None]:
user='Threnody'

### Make Predictions

In [None]:
start = time.time()

predictions = {}
predictions[user] = {}

for game in game_ids:

    intersect_items = {key:value for (key, value) in item_similarities[game].items() if key in user_ratings[user].keys()}
    
    if len(intersect_items)==0:
        continue
    
    else:
        intersect_items = {key:value for (key, value) in item_similarities[game].items() if key in user_ratings[user].keys()}
        weighted = [intersect_items[game]*user_ratings[user][game]/10 for game in intersect_items.keys()]
        scaled_prediction = sum(weighted)/sum(intersect_items.values())
        
        if scaled_prediction > 0:
            predictions[user][game] = round(scaled_prediction, 1)
            
            
relevant = [key for key in user_ratings[user].keys() if user_ratings[user][key]>0]
relevant_and_recommended = [key for key in predictions[user].keys() if key in user_ratings[user].keys() and user_ratings[user][key]>0 and predictions[user][key]>0]
recall = len(relevant_and_recommended)/len(relevant)
overlap_items = [key for key in predictions[user].keys() if key in user_ratings[user].keys()]
preds = [value for (key, value) in predictions[user].items() if key in overlap_items]
actuals = [value/10 for (key, value) in user_ratings[user].items() if key in overlap_items]
mae = mean_absolute_error(preds, actuals)
new_predictions = {key:value+user_means[user] for (key, value) in predictions[user].items() if key not in user_ratings[user].keys()}

end = time.time()
print(end-start)
print(recall)
print(mae)

In [None]:
df = pd.DataFrame.from_dict(new_predictions, orient='index').sort_values(0, ascending=False)
df['Game'] = df.index.map(game_id_lookup)
df.head(50)