In [1]:
from src.recommenders import ImprovedRecommender
from sklearn.decomposition import TruncatedSVD
import pandas as pd
import numpy as np
from os import cpu_count
from multiprocessing import Pool
from itertools import combinations

In [2]:
qual_eval_folder = './evaluation'
items_path = "./data/games.pkl"
data_path = "./data/interactions_splits_0"
reviews_path = "./data/reviews.parquet"

Let's test with different feedback weighting schemes!

In [4]:
weighting_schemes = [{'playtime': playtime, 'sentiment': sentiment, 'reviews': reviews} for playtime in [True, False] for sentiment in ['rating', 'n_reviews', 'mixed', False] for reviews in [True, False]]
def test_weighting_scheme(rec, weighting_scheme):
    rec.set_weighting_scheme(weighting_scheme)
    rec.generate_recommendations(read_max=5000)
    return rec.evaluate(k=10)

results = []
rec = ImprovedRecommender(items_path, train_path=f"{data_path}_train.parquet", test_path=f"{data_path}_test.parquet", val_path=f"{data_path}_val.parquet", reviews_path=reviews_path, sparse=True, tfidf='smooth', normalize=True)
for weighting_scheme in weighting_schemes:
    results.append(test_weighting_scheme(rec, weighting_scheme))
    print(weighting_scheme, results[-1])

# with Pool(min(cpu_count(), len(weighting_schemes))) as pool:
#     results = [pool.apply_async(test_weighting_scheme, args=(rec, weighting_scheme)) for rec, weighting_scheme in zip(recommenders, weighting_schemes)]
#     output = [p.get() for p in results]
# print(output)

5000it [01:26, 58.09it/s] 
5000it [00:53, 93.16it/s] 


{'playtime': True, 'sentiment': 'rating', 'reviews': True} {'HR@10': 0.4228, 'nDCG@10': 0.06525763278935312, 'recall@10': 0.025397263021765485, 'ideal_recall@10': 0.4699721699630126, 'nRecall@10': 0.0587}


5000it [00:13, 361.46it/s]
5000it [00:13, 361.76it/s]


{'playtime': True, 'sentiment': 'rating', 'reviews': False} {'HR@10': 0.4246, 'nDCG@10': 0.06547651359661667, 'recall@10': 0.025474629313172032, 'ideal_recall@10': 0.4699721699630126, 'nRecall@10': 0.05892000000000001}


5000it [00:14, 347.82it/s]
5000it [00:13, 379.53it/s]


{'playtime': True, 'sentiment': 'n_reviews', 'reviews': True} {'HR@10': 0.4146, 'nDCG@10': 0.06433291614347755, 'recall@10': 0.024974584370984838, 'ideal_recall@10': 0.4699721699630126, 'nRecall@10': 0.05772000000000001}


5000it [00:13, 367.87it/s]
5000it [00:13, 381.19it/s]


{'playtime': True, 'sentiment': 'n_reviews', 'reviews': False} {'HR@10': 0.4148, 'nDCG@10': 0.06441256600200916, 'recall@10': 0.024913667555266528, 'ideal_recall@10': 0.4699721699630126, 'nRecall@10': 0.0578}


5000it [00:15, 318.34it/s]
5000it [00:12, 386.12it/s]


{'playtime': True, 'sentiment': 'mixed', 'reviews': True} {'HR@10': 0.413, 'nDCG@10': 0.06434687302818139, 'recall@10': 0.02484323566015497, 'ideal_recall@10': 0.4699721699630126, 'nRecall@10': 0.057479999999999996}


5000it [00:14, 336.49it/s]
5000it [00:13, 383.01it/s]


{'playtime': True, 'sentiment': 'mixed', 'reviews': False} {'HR@10': 0.4132, 'nDCG@10': 0.06448905343819163, 'recall@10': 0.024869328961992117, 'ideal_recall@10': 0.4699721699630126, 'nRecall@10': 0.05765999999999999}


5000it [00:11, 425.17it/s]
5000it [00:13, 373.71it/s]


{'playtime': True, 'sentiment': False, 'reviews': True} {'HR@10': 0.4228, 'nDCG@10': 0.06501409436726276, 'recall@10': 0.02547881659901774, 'ideal_recall@10': 0.4699721699630126, 'nRecall@10': 0.0587}


5000it [00:11, 439.99it/s]
5000it [00:13, 371.45it/s]


{'playtime': True, 'sentiment': False, 'reviews': False} {'HR@10': 0.4224, 'nDCG@10': 0.06492164888148048, 'recall@10': 0.025425396248439064, 'ideal_recall@10': 0.4699721699630126, 'nRecall@10': 0.058620000000000005}


5000it [00:13, 368.19it/s]
5000it [00:12, 398.76it/s]


{'playtime': False, 'sentiment': 'rating', 'reviews': True} {'HR@10': 0.4594, 'nDCG@10': 0.06752108300078562, 'recall@10': 0.02905673362489305, 'ideal_recall@10': 0.4699721699630126, 'nRecall@10': 0.06522}


5000it [00:12, 388.93it/s]
5000it [00:13, 359.58it/s]


{'playtime': False, 'sentiment': 'rating', 'reviews': False} {'HR@10': 0.459, 'nDCG@10': 0.06741732313561816, 'recall@10': 0.029033907222324944, 'ideal_recall@10': 0.4699721699630126, 'nRecall@10': 0.06516}


5000it [00:13, 367.01it/s]
5000it [00:12, 403.24it/s]


{'playtime': False, 'sentiment': 'n_reviews', 'reviews': True} {'HR@10': 0.4476, 'nDCG@10': 0.06641093911803564, 'recall@10': 0.028283457764622336, 'ideal_recall@10': 0.4699721699630126, 'nRecall@10': 0.06412000000000001}


5000it [00:13, 383.27it/s]
5000it [00:12, 396.98it/s]


{'playtime': False, 'sentiment': 'n_reviews', 'reviews': False} {'HR@10': 0.4474, 'nDCG@10': 0.0663779666088508, 'recall@10': 0.028280045829780814, 'ideal_recall@10': 0.4699721699630126, 'nRecall@10': 0.06402}


5000it [00:14, 333.66it/s]
5000it [00:12, 398.33it/s]


{'playtime': False, 'sentiment': 'mixed', 'reviews': True} {'HR@10': 0.4396, 'nDCG@10': 0.064989499818802, 'recall@10': 0.027634718104137767, 'ideal_recall@10': 0.4699721699630126, 'nRecall@10': 0.062340000000000007}


5000it [00:14, 349.76it/s]
5000it [00:12, 402.03it/s]


{'playtime': False, 'sentiment': 'mixed', 'reviews': False} {'HR@10': 0.4386, 'nDCG@10': 0.06486798234747371, 'recall@10': 0.02760598326408653, 'ideal_recall@10': 0.4699721699630126, 'nRecall@10': 0.0622}


5000it [00:11, 453.91it/s]
5000it [00:14, 355.00it/s]


{'playtime': False, 'sentiment': False, 'reviews': True} {'HR@10': 0.4526, 'nDCG@10': 0.067024408265796, 'recall@10': 0.02869035591269122, 'ideal_recall@10': 0.4699721699630126, 'nRecall@10': 0.06472}


5000it [00:05, 921.54it/s] 
5000it [00:12, 404.65it/s]


{'playtime': False, 'sentiment': False, 'reviews': False} {'HR@10': 0.4526, 'nDCG@10': 0.06702896196379031, 'recall@10': 0.028736864193779417, 'ideal_recall@10': 0.4699721699630126, 'nRecall@10': 0.06477999999999999}


Let's test with different item representations!

In [3]:
information = ['specs', 'genres', 'early_access', 'publisher', 'developer']
def test_representation(columns):
    rec = ImprovedRecommender(items_path, train_path=f"{data_path}_train.parquet", test_path=f"{data_path}_test.parquet", val_path=f"{data_path}_val.parquet", reviews_path=reviews_path, sparse=True, tfidf='smooth', normalize=True, columns=columns)
    rec.generate_recommendations(read_max=5000)
    return rec.evaluate(k=10)

results = []
representations = [map(list, combinations(information, r)) for r in range(1, len(information))]
representations = [rep+['tags'] for sublist in representations for rep in sublist]
for columns in representations:
    results.append(test_representation(columns))
    print(columns, results[-1])

# with Pool(min(cpu_count(), len(weighting_schemes))) as pool:
#     results = [pool.apply_async(test_weighting_scheme, args=(rec, weighting_scheme)) for rec, weighting_scheme in zip(recommenders, weighting_schemes)]
#     output = [p.get() for p in results]
# print(output)

5000it [00:08, 613.82it/s]
5000it [00:14, 333.63it/s]


{'HR@10': 0.5554, 'nDCG@10': 0.09560604045645142, 'recall@10': 0.029763732909381786, 'ideal_recall@10': 0.3672989058057593, 'nRecall@10': 0.0841}


5000it [00:08, 611.87it/s]
2523it [00:07, 326.59it/s]


KeyboardInterrupt: 