In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import polars as pl
import pickle
import seaborn as sns
from sklearn.model_selection import train_test_split
from collections import defaultdict
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
import matplotlib.pyplot as plt
import tqdm 
import os
from surprise import Dataset, Reader, accuracy
from surprise.model_selection import cross_validate, KFold, GridSearchCV
from surprise import NormalPredictor, BaselineOnly, KNNBasic, KNNWithMeans, KNNWithZScore, KNNBaseline, SVD, NMF, SlopeOne, CoClustering    
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

When choosing between two recommendation models, SVD and KNNBasic, and their overall RMSE and Precision@K are similar, but KNNBasic has a better Recall@K we chose KNNBasic over SVD by considering the following:

    Recall is a measure of the fraction of relevant items that were recommended to the user, out of all relevant items. A higher recall means that more relevant items were recommended to the user.


Overall, if recall is more important for your application and you have a high degree of user-item sparsity, KNNBasic may be a better choice than SVD.

In [3]:
with open('../data/data_for_model/data_for_model.pkl', 'rb') as f:
    # Load the contents of the file using pickle.load()
    data_for_model = pickle.load(f)

In [4]:
algo = KNNBasic()

In [47]:
names = ['recalls_list','precs_list','result_all']
names_dict = defaultdict(list)

In [48]:
# [i for i in os.listdir('../data/') if i not in ['.ipynb_checkpoints','input','data_for_model']]
#data_dir is data path
data_dir = '../data/'
folder_list = [i for i in os.listdir(data_dir) if i not in ['.ipynb_checkpoints','input','data_for_model']]
for i in folder_list:
    x = f"{data_dir}{i}"
    for enum_num,enum_name in enumerate(names):
        file_path = f"{x}/{enum_num}.pickle"
        with open(file_path, 'rb') as f:
            data = pickle.load(f)
            names_dict[enum_name].append(data)


#use this to loop

# with open(f'{data_dir}{var}/0.pickle', 'rb') as f:
#     test = pickle.load(f)
# var
    
    
# with open('..data/data_for_model/data_for_model.pkl', 'rb') as f:
#     # Load the contents of the file using pickle.load()
#     data_for_model = pickle.load(f)

In [60]:
averages = {}
for sublist in names_dict['result_all']:
    for algorithm, score in sublist:
        if algorithm not in averages:
            averages[algorithm] = score
        else:
            averages[algorithm].append(score)


In [61]:
averages

{'SVD': [1.0213712081874704,
  [1.0204828673113941],
  [1.0203655120368236],
  [1.0200507047167389],
  [1.0200749272648115],
  [1.0204828673113941],
  [1.0203655120368236],
  [1.0200507047167389],
  [1.0200749272648115]],
 'NonNegative Matrix Factorization': [1.7844759821295415,
  [1.777024081528765],
  [1.7885889749663555],
  [1.7871589997098034],
  [1.785430450313568],
  [1.777024081528765],
  [1.7885889749663555],
  [1.7871589997098034],
  [1.785430450313568]],
 'Slope One': [1.0174268302254594,
  [1.0175506091427093],
  [1.0174059117435126],
  [1.0175889050438958],
  [1.0175749963301826],
  [1.0175506091427093],
  [1.0174059117435126],
  [1.0175889050438958],
  [1.0175749963301826]],
 'Co-clustering': [1.045843777282942,
  [1.0469009270176166],
  [1.045874629570217],
  [1.0448459966189174],
  [1.0448217456155375],
  [1.0469009270176166],
  [1.045874629570217],
  [1.0448459966189174],
  [1.0448217456155375]],
 'Baseline': [1.0205603642904433,
  [1.0204669150813224],
  [1.02057036146

In [66]:
rmse_dict = {}
for algorithm, scores in averages.items():
    average_score = sum(scores) / len(scores)
    rmse_dict[algorithm] =  average_score[0]

In [67]:
rmse_dict

{'SVD': 1.0203688034274454,
 'NonNegative Matrix Factorization': 1.7845423327962804,
 'Slope One': 1.0175186305273403,
 'Co-clustering': 1.04563670832528,
 'Baseline': 1.0205726333795142,
 'Normal_predictor': 1.8536120804335374,
 'KNNBasic': 1.0719185261883608,
 'KNNBaseline': 1.0161866247709253,
 'KNNWithMeans': 1.0323884757127495,
 'KNNWithZScore': 1.0319913300349934}

In [8]:
recalls_dict = {model_name: recall_score for recall_list in 
                names_dict['recalls_list'] 
                for model_name, recall_score in recall_list}


In [15]:
precs_dict = {model_name: recall_score for recall_list in 
                names_dict['precs_list'] 
                for model_name, recall_score in recall_list}

In [71]:
rmse_dict

{'SVD': 1.0203688034274454,
 'NonNegative Matrix Factorization': 1.7845423327962804,
 'Slope One': 1.0175186305273403,
 'Co-clustering': 1.04563670832528,
 'Baseline': 1.0205726333795142,
 'Normal_predictor': 1.8536120804335374,
 'KNNBasic': 1.0719185261883608,
 'KNNBaseline': 1.0161866247709253,
 'KNNWithMeans': 1.0323884757127495,
 'KNNWithZScore': 1.0319913300349934}

In [77]:
dict_list = [recalls_dict,precs_dict,rmse_dict]
dataframe_results = pd.DataFrame(dict_list).T

In [80]:
dataframe_results.rename(columns={0:'recall_at_k',1:'precision_at_k',2:'average_rmse'},inplace=True)

In [87]:
dataframe_results[['precision_at_k','recall_at_k','average_rmse']].sort_values('precision_at_k',ascending=False)

Unnamed: 0,precision_at_k,recall_at_k,average_rmse
KNNBasic,0.698465,0.428652,1.071919
SVD,0.698177,0.390089,1.020369
KNNBaseline,0.688521,0.356677,1.016187
Baseline,0.68613,0.367994,1.020573
KNNWithZScore,0.683345,0.367066,1.031991
Slope One,0.679875,0.360517,1.017519
Co-clustering,0.665922,0.333746,1.045637
KNNWithMeans,0.66181,0.334689,1.032388
Normal_predictor,0.465807,0.314209,1.853612
NonNegative Matrix Factorization,0.087494,0.009557,1.784542


In [108]:
model = KNNBasic()
reader = Reader(rating_scale=(1, 10))
data = Dataset.load_from_df(zz[['Username','BGGId', 'Rating']],reader)

In [109]:
train=data.build_full_trainset()
model.fit(train)

Computing the msd similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x7f6a6ef6a690>

In [273]:
import random
user_list = []
game_list = zz[zz['Username'] == 'Evabelle']['BGGId'].to_list()

In [274]:
not_user_list = [i for i in games_csv2['BGGId'].to_list() if i not in game_list]

In [275]:
predict_list = [model.predict(uid='Evabelle', iid=i) for i in not_user_list]
est = [i.est for i in predict_list if i.details['was_impossible'] !=True]
iids = [i.iid for i in predict_list if i.details['was_impossible'] !=True]

test_df = pd.DataFrame({'est': est,
              'BGGId': iids}
            )

In [277]:
df_test = pd.merge(test_df, games_csv2.to_pandas(), on='BGGId')

In [323]:
df_test[['Username','Name','est']]

Unnamed: 0,est,BGGId,Unnamed: 3,Name,Description,YearPublished,GameWeight,AvgRating,BayesAvgRating,StdDev,...,Rank:partygames,Rank:childrensgames,Cat:Thematic,Cat:Strategy,Cat:War,Cat:Family,Cat:CGS,Cat:Abstract,Cat:Party,Cat:Childrens
0,6.956780,29055,7781,Down in Flames: Locked-On,flame jet bring flame system jet age add missi...,2018,1.8333,7.87404,5.61883,1.47290,...,21926,21926,0,0,1,0,0,0,0,0
1,6.753060,46614,9258,Nonaga,nonaga hexagonal playing area compose ring p...,2017,2.0000,6.84571,5.58707,1.31822,...,21926,21926,0,0,0,0,0,1,0,0
2,7.740358,62230,9578,Wild Blue Yonder,gmt release rise luftwaffe flames wwii air c...,2017,2.6364,7.82348,5.75074,1.36084,...,21926,21926,0,0,1,0,0,0,0,0
3,8.718947,85147,10261,Wings of the Motherland,fight wing game system volume go russian mod...,2019,4.2500,9.29208,5.61394,1.29979,...,21926,21926,0,0,1,0,0,0,0,0
4,7.407144,94375,10585,The Great Game: Rival Empires in Central Asia ...,company websitethe great game recreate th cent...,2018,2.0833,7.13100,5.58236,1.71678,...,21926,21926,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3074,8.348060,345584,21914,Mindbug,mindbug summon hybrid creature send battle opp...,2021,1.9231,8.27692,5.72520,1.43554,...,21926,21926,0,0,0,0,0,0,0,0
3075,7.696315,346482,21916,Voll verplant,voll verplant player create subway network fil...,2021,1.6667,7.49111,5.59115,1.01737,...,21926,21926,0,0,0,0,0,0,0,0
3076,7.702637,346501,21917,Mille Fiori,mille fiori role glass manufacturer trader wan...,2021,2.2857,7.37574,5.56872,1.43467,...,21926,21926,0,0,0,0,0,0,0,0
3077,7.856689,346703,21918,7 Wonders: Architects,wonder architect player race leader ancien...,2021,1.4286,7.22351,6.08654,1.44546,...,21926,21926,0,0,0,1,0,0,0,0
