In [1]:
from src.recommenders import ImprovedRecommender, PopBasedRecommender
from sklearn.decomposition import TruncatedSVD, PCA, IncrementalPCA, KernelPCA, SparsePCA
import numpy as np
import pickle

In [2]:
output_path = "./evaluation/"
items_path = "./data/games.pkl"
data_path = "./data/interactions_splits_"
reviews_path = "./data/reviews.parquet"

In [3]:
am_splits = 5
k_values = [5, 10, 20]

In [4]:
def save_pickle(save_path: str, data) -> None:
    with open(save_path + ".pickle", "wb") as handle:
        pickle.dump(data, handle, protocol=pickle.HIGHEST_PROTOCOL)


In [5]:
def invert_dict(dicts:list) -> dict:
    new_dict = dict()
    for i, d in enumerate(dicts):
        for key in d.keys():
            ar = new_dict.get(key, np.zeros(len(dicts)))
            ar[i] = d[key]
            new_dict[key] = ar
    return new_dict

In [19]:
evaluations = list()

for split in range(am_splits):
    rec = PopBasedRecommender(train_path=f"{data_path}{split}_train.parquet", test_path=f"{data_path}{split}_test.parquet", val_path=f"{data_path}{split}_val.parquet")
    rec.generate_recommendations(amount=20)
    evaluation = dict()
    for k in k_values:
        evaluation.update(rec.evaluate(val=False, k=k))
    evaluations.append(evaluation)
        
evaluations = invert_dict(evaluations)
save_pickle(output_path + "pop_based_old", evaluations)
evaluations

{'HR@5': array([0.50643163, 0.50444656, 0.50382722, 0.50357313, 0.50558996]),
 'nDCG@5': array([0.18972128, 0.18966486, 0.19010971, 0.18954848, 0.19111418]),
 'recall@5': array([0.15457402, 0.15466747, 0.15479303, 0.1544612 , 0.1565749 ]),
 'ideal_recall@5': array([0.78360788, 0.78360788, 0.78360788, 0.78360788, 0.78360788]),
 'nRecall@5': array([0.20047324, 0.20037425, 0.20040178, 0.19983299, 0.20214679]),
 'HR@10': array([0.63258695, 0.63071304, 0.63049071, 0.63109417, 0.63385739]),
 'nDCG@10': array([0.19870568, 0.19873141, 0.19913717, 0.19852995, 0.20022896]),
 'recall@10': array([0.21966529, 0.21943092, 0.22015371, 0.21925797, 0.22141995]),
 'ideal_recall@10': array([0.91868049, 0.91868049, 0.91868049, 0.91868049, 0.91868049]),
 'nRecall@10': array([0.23557168, 0.23525505, 0.23582393, 0.23489248, 0.23715273]),
 'HR@20': array([0.74734   , 0.74595839, 0.74622836, 0.74662538, 0.74997618]),
 'nDCG@20': array([0.22531723, 0.22525447, 0.22569324, 0.22517075, 0.22728064]),
 'recall@20':

In [7]:
evaluations = list()
use_data = ["specs", "genres", "tags", "early_access", "publisher"]
weighting_scheme = {"playtime": True, "sentiment": "mixed", "reviews": True}
for split in range(am_splits):
    rec = ImprovedRecommender(items_path, train_path=f"{data_path}{split}_train.parquet", test_path=f"{data_path}{split}_test.parquet",
                              val_path=f"{data_path}{split}_val.parquet", reviews_path=reviews_path, sparse=True, tfidf="smooth", normalize=True, dim_red=None, columns=use_data, weighting_scheme=weighting_scheme)
    rec.generate_recommendations(amount=20, silence=True)
    evaluation = dict()
    for k in k_values:
        evaluation.update(rec.evaluate(val=False, k=k))
    evaluations.append(evaluation)

evaluations = invert_dict(evaluations)
save_pickle(output_path + "improved_old", evaluations)
evaluations

{'HR@5': array([0.24295696, 0.24618072, 0.24451326, 0.24498968, 0.24591075]),
 'nDCG@5': array([0.07642187, 0.07635879, 0.07571041, 0.07651719, 0.07639398]),
 'recall@5': array([0.05850485, 0.05832972, 0.05797425, 0.05875014, 0.05801129]),
 'ideal_recall@5': array([0.78360788, 0.78360788, 0.78360788, 0.78360788, 0.78360788]),
 'nRecall@5': array([0.08016939, 0.08050077, 0.07987798, 0.08055926, 0.08008973]),
 'HR@10': array([0.35794823, 0.35979038, 0.35945688, 0.36072733, 0.3598539 ]),
 'nDCG@10': array([0.08423492, 0.08362828, 0.08382418, 0.08438959, 0.08378751]),
 'recall@10': array([0.09527999, 0.09429938, 0.09546468, 0.09594863, 0.09453092]),
 'ideal_recall@10': array([0.91868049, 0.91868049, 0.91868049, 0.91868049, 0.91868049]),
 'nRecall@10': array([0.10369078, 0.1026387 , 0.10387667, 0.10434023, 0.10292392]),
 'HR@20': array([0.48931237, 0.49016992, 0.48867715, 0.48997936, 0.4891218 ]),
 'nDCG@20': array([0.10043774, 0.10009629, 0.10006259, 0.10071702, 0.10012304]),
 'recall@20':

In [6]:
evaluations = list()
use_data = ["specs", "genres", "tags", "early_access", "publisher"]
weighting_scheme = {"playtime": False, "sentiment": "mixed", "reviews": True}
for split in range(am_splits):
    rec = ImprovedRecommender(items_path, train_path=f"{data_path}{split}_train.parquet", test_path=f"{data_path}{split}_test.parquet",
                              val_path=f"{data_path}{split}_val.parquet", reviews_path=reviews_path, sparse=True, tfidf="smooth", normalize=True, dim_red=None, columns=use_data, weighting_scheme=weighting_scheme)
    rec.generate_recommendations(amount=20, silence=True)
    evaluation = dict()
    for k in k_values:
        evaluation.update(rec.evaluate(val=False, k=k))
    evaluations.append(evaluation)

evaluations = invert_dict(evaluations)
save_pickle(output_path + "improved_noplay_old", evaluations)
evaluations

{'HR@5': array([0.32709227, 0.32472606, 0.32707639, 0.32713991, 0.3247737 ]),
 'nDCG@5': array([0.10327671, 0.10219285, 0.1030295 , 0.10308206, 0.10242568]),
 'recall@5': array([0.08724234, 0.08643466, 0.08698744, 0.08757959, 0.08593902]),
 'ideal_recall@5': array([0.78360788, 0.78360788, 0.78360788, 0.78360788, 0.78360788]),
 'nRecall@5': array([0.11322932, 0.11204198, 0.11294214, 0.11326002, 0.11158806]),
 'HR@10': array([0.46055264, 0.4578053 , 0.460505  , 0.46025091, 0.45952041]),
 'nDCG@10': array([0.11589327, 0.11516996, 0.11581641, 0.11579307, 0.11549489]),
 'recall@10': array([0.1377144 , 0.13726897, 0.13766463, 0.13807317, 0.13757186]),
 'ideal_recall@10': array([0.91868049, 0.91868049, 0.91868049, 0.91868049, 0.91868049]),
 'nRecall@10': array([0.1466803 , 0.14620667, 0.14657176, 0.14686447, 0.14640347]),
 'HR@20': array([0.58759727, 0.58537399, 0.58788312, 0.58893124, 0.58821661]),
 'nDCG@20': array([0.13570283, 0.13498625, 0.13587094, 0.13594861, 0.13547466]),
 'recall@20':

In [7]:
use_data = ["specs", "genres", "tags", "early_access", "publisher"]
weighting_scheme = {"playtime": False, "sentiment": False, "reviews": False}

rec = ImprovedRecommender(items_path, train_path=f"{data_path}0_train.parquet", test_path=f"{data_path}0_test.parquet", val_path=f"{data_path}0_val.parquet", reviews_path=reviews_path, sparse=True, tfidf="smooth", normalize=True, dim_red=None, columns=use_data, weighting_scheme=weighting_scheme)
rec.generate_recommendations(amount=10, silence=True)
evaluate = rec.evaluate(val=False, k=10)
evaluate



{'HR@10': 0.4482293155470859,
 'nDCG@10': 0.11380874154233078,
 'recall@10': 0.13496189808065726,
 'ideal_recall@10': 0.9186804883480776,
 'nRecall@10': 0.14345050301100803}