In [1]:
from src.recommenders import ImprovedRecommender
from sklearn.decomposition import TruncatedSVD, PCA, IncrementalPCA, KernelPCA, SparsePCA
import pandas as pd
import numpy as np
from os import cpu_count
from multiprocessing import Pool
from itertools import combinations
import pickle

In [2]:
qual_eval_folder = './evaluation'
items_path = "./data/games.pkl"
data_path = "./data/interactions_splits_"
reviews_path = "./data/reviews.parquet"

In [3]:
def average_results(results):
    average_dict = {}
    for key in results[0]:
        key_list = [result[key] for result in results]
        average_dict[key] = sum(key_list)/len(key_list)
    return average_dict


In [9]:
split_results = []
for split in [data_path + str(i) for i in range(3)]:
    rec = ImprovedRecommender(items_path, train_path=f"{split}_train.parquet", test_path=f"{split}_test.parquet", val_path=f"{split}_val.parquet", reviews_path=reviews_path, sparse=True, tfidf='smooth', normalize=True)
    rec.generate_recommendations(read_max=1000)
    split_results.append(rec.evaluate(k=10, val=True))

with open('./evaluation/results.pickle', 'wb') as handle:
    pickle.dump(average_results(split_results), handle, protocol=pickle.HIGHEST_PROTOCOL)
print(average_results(split_results))

1000it [00:01, 619.28it/s]
1000it [00:02, 391.61it/s]
1000it [00:01, 608.11it/s]
1000it [00:02, 394.59it/s]
1000it [00:01, 602.65it/s]
1000it [00:02, 391.95it/s]


{'HR@10': 0.28833333333333333, 'nDCG@10': 0.08141360603426297, 'recall@10': 0.10015726969805705, 'ideal_recall@10': 0.935365173648156, 'nRecall@10': 0.10452896825396825}


Let's test with different item representations!

In [12]:
information = ['specs', 'genres', 'early_access', 'publisher', 'developer']
def test_representation(columns, split):
    rec = ImprovedRecommender(items_path, train_path=f"{split}_train.parquet", test_path=f"{split}_test.parquet", val_path=f"{split}_val.parquet", reviews_path=reviews_path, sparse=True, tfidf='smooth', normalize=True, columns=columns, weighting_scheme={})
    rec.generate_recommendations(read_max=5000)
    return rec.evaluate(k=10)

results = []
representations = [map(list, combinations(information, r)) for r in range(1, len(information)+1)]
representations = [rep+['tags'] for sublist in representations for rep in sublist]
for columns in representations:
    split_results = []
    for split in [data_path + str(i) for i in range(3)]:
        split_results.append(test_representation(columns, split))
    results.append(average_results(split_results))
    print(columns, results[-1])

with open('./evaluation/representations.pickle', 'wb') as handle:
    pickle.dump(representations, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open('./evaluation/representations_results.pickle', 'wb') as handle:
    pickle.dump(results, handle, protocol=pickle.HIGHEST_PROTOCOL)

5000it [00:00, 8446.30it/s]
5000it [00:10, 472.60it/s]
5000it [00:00, 8531.39it/s]
5000it [00:10, 474.53it/s]
5000it [00:00, 7721.89it/s]
5000it [00:10, 467.30it/s]


['specs', 'tags'] {'HR@10': 0.1902, 'nDCG@10': 0.051539207908820474, 'recall@10': 0.06717211479669648, 'ideal_recall@10': 0.9429034701783022, 'nRecall@10': 0.06939227513227514}


5000it [00:00, 8516.16it/s]
5000it [00:09, 518.66it/s]
5000it [00:00, 8410.25it/s]
5000it [00:09, 521.11it/s]
5000it [00:00, 8665.30it/s]
5000it [00:09, 524.44it/s]


['genres', 'tags'] {'HR@10': 0.19013333333333335, 'nDCG@10': 0.04837447216211648, 'recall@10': 0.06477183772303335, 'ideal_recall@10': 0.9429034701783022, 'nRecall@10': 0.06733669312169312}


5000it [00:00, 8675.92it/s]
5000it [00:09, 512.81it/s]
5000it [00:00, 8757.40it/s]
5000it [00:09, 507.66it/s]
5000it [00:00, 8127.73it/s]
5000it [00:10, 495.57it/s]


['early_access', 'tags'] {'HR@10': 0.19053333333333333, 'nDCG@10': 0.04850597681774197, 'recall@10': 0.06500904155128008, 'ideal_recall@10': 0.9429034701783022, 'nRecall@10': 0.06756915343915343}


5000it [00:01, 3238.20it/s]
5000it [00:13, 374.62it/s]
5000it [00:01, 3148.72it/s]
5000it [00:12, 393.92it/s]
5000it [00:01, 3215.19it/s]
5000it [00:12, 393.09it/s]


['publisher', 'tags'] {'HR@10': 0.2614666666666667, 'nDCG@10': 0.07419843603846196, 'recall@10': 0.0944219852957892, 'ideal_recall@10': 0.9429034701783022, 'nRecall@10': 0.09824222222222223}


5000it [00:02, 2470.06it/s]
5000it [00:12, 400.92it/s]
5000it [00:02, 2469.48it/s]
5000it [00:12, 390.10it/s]
5000it [00:02, 2412.35it/s]
5000it [00:12, 389.72it/s]


['developer', 'tags'] {'HR@10': 0.2484666666666667, 'nDCG@10': 0.06991308061813012, 'recall@10': 0.08976327941508815, 'ideal_recall@10': 0.9429034701783022, 'nRecall@10': 0.0932865873015873}


5000it [00:00, 8246.44it/s]
5000it [00:10, 472.12it/s]
5000it [00:00, 8235.60it/s]
5000it [00:10, 475.89it/s]
5000it [00:00, 7942.81it/s]
5000it [00:11, 441.32it/s]


['specs', 'genres', 'tags'] {'HR@10': 0.19013333333333335, 'nDCG@10': 0.051512375330560335, 'recall@10': 0.06709598426997455, 'ideal_recall@10': 0.9429034701783022, 'nRecall@10': 0.06932291005291005}


5000it [00:00, 8264.45it/s]
5000it [00:10, 472.08it/s]
5000it [00:00, 8441.87it/s]
5000it [00:10, 471.72it/s]
5000it [00:00, 8385.41it/s]
5000it [00:10, 478.96it/s]


['specs', 'early_access', 'tags'] {'HR@10': 0.1902, 'nDCG@10': 0.051539207908820474, 'recall@10': 0.06717211479669648, 'ideal_recall@10': 0.9429034701783022, 'nRecall@10': 0.06939227513227514}


5000it [00:01, 3126.66it/s]
5000it [00:13, 372.61it/s]
5000it [00:01, 3204.06it/s]
5000it [00:13, 377.57it/s]
5000it [00:01, 3172.88it/s]
5000it [00:13, 362.35it/s]


['specs', 'publisher', 'tags'] {'HR@10': 0.26313333333333333, 'nDCG@10': 0.07651871451299042, 'recall@10': 0.09571626259231343, 'ideal_recall@10': 0.9429034701783022, 'nRecall@10': 0.09928719576719576}


5000it [00:02, 2450.83it/s]
5000it [00:13, 361.73it/s]
5000it [00:02, 2364.78it/s]
5000it [00:13, 366.94it/s]
5000it [00:02, 2424.01it/s]
5000it [00:14, 355.39it/s]


['specs', 'developer', 'tags'] {'HR@10': 0.25279999999999997, 'nDCG@10': 0.07369298590099534, 'recall@10': 0.09162933459030874, 'ideal_recall@10': 0.9429034701783022, 'nRecall@10': 0.09512396825396825}


5000it [00:00, 8582.96it/s]
5000it [00:09, 515.76it/s]
5000it [00:00, 8402.83it/s]
5000it [00:09, 515.35it/s]
5000it [00:00, 8625.39it/s]
5000it [00:09, 502.72it/s]


['genres', 'early_access', 'tags'] {'HR@10': 0.19013333333333335, 'nDCG@10': 0.04837447216211648, 'recall@10': 0.06477183772303335, 'ideal_recall@10': 0.9429034701783022, 'nRecall@10': 0.06733669312169312}


5000it [00:01, 3235.87it/s]
5000it [00:12, 391.71it/s]
5000it [00:01, 3053.43it/s]
5000it [00:12, 397.01it/s]
5000it [00:01, 3092.27it/s]
5000it [00:12, 386.27it/s]


['genres', 'publisher', 'tags'] {'HR@10': 0.26120000000000004, 'nDCG@10': 0.07417781192556498, 'recall@10': 0.094320090913855, 'ideal_recall@10': 0.9429034701783022, 'nRecall@10': 0.09813222222222222}


5000it [00:02, 2472.80it/s]
5000it [00:12, 397.85it/s]
5000it [00:02, 2443.52it/s]
5000it [00:12, 393.88it/s]
5000it [00:02, 2407.66it/s]
5000it [00:12, 387.72it/s]


['genres', 'developer', 'tags'] {'HR@10': 0.24793333333333334, 'nDCG@10': 0.06985427355294115, 'recall@10': 0.08965999632030504, 'ideal_recall@10': 0.9429034701783022, 'nRecall@10': 0.09318949735449734}


5000it [00:01, 3159.24it/s]
5000it [00:12, 392.94it/s]
5000it [00:01, 3188.85it/s]
5000it [00:12, 397.62it/s]
5000it [00:01, 3137.64it/s]
5000it [00:12, 392.00it/s]


['early_access', 'publisher', 'tags'] {'HR@10': 0.2614666666666667, 'nDCG@10': 0.07419843603846196, 'recall@10': 0.0944219852957892, 'ideal_recall@10': 0.9429034701783022, 'nRecall@10': 0.09824222222222223}


5000it [00:02, 2443.28it/s]
5000it [00:12, 386.36it/s]
5000it [00:02, 2465.74it/s]
5000it [00:12, 389.74it/s]
5000it [00:02, 2324.17it/s]
5000it [00:13, 384.59it/s]


['early_access', 'developer', 'tags'] {'HR@10': 0.2484666666666667, 'nDCG@10': 0.06991308061813012, 'recall@10': 0.08976327941508815, 'ideal_recall@10': 0.9429034701783022, 'nRecall@10': 0.0932865873015873}


5000it [00:02, 2134.18it/s]
5000it [00:13, 372.11it/s]
5000it [00:02, 2140.26it/s]
5000it [00:13, 383.15it/s]
5000it [00:02, 2165.01it/s]
5000it [00:13, 374.47it/s]


['publisher', 'developer', 'tags'] {'HR@10': 0.2594666666666667, 'nDCG@10': 0.07601606741859933, 'recall@10': 0.09694517402355, 'ideal_recall@10': 0.9429034701783022, 'nRecall@10': 0.10029952380952382}


5000it [00:00, 8221.29it/s]
5000it [00:10, 476.53it/s]
5000it [00:00, 8314.57it/s]
5000it [00:10, 467.58it/s]
5000it [00:00, 8223.52it/s]
5000it [00:10, 467.84it/s]


['specs', 'genres', 'early_access', 'tags'] {'HR@10': 0.19013333333333335, 'nDCG@10': 0.051512375330560335, 'recall@10': 0.06709598426997455, 'ideal_recall@10': 0.9429034701783022, 'nRecall@10': 0.06932291005291005}


5000it [00:01, 3147.01it/s]
5000it [00:13, 362.60it/s]
5000it [00:01, 3078.46it/s]
5000it [00:13, 365.61it/s]
5000it [00:01, 3055.79it/s]
5000it [00:13, 358.59it/s]


['specs', 'genres', 'publisher', 'tags'] {'HR@10': 0.2630666666666666, 'nDCG@10': 0.07650533060930705, 'recall@10': 0.09571198603553689, 'ideal_recall@10': 0.9429034701783022, 'nRecall@10': 0.09927878306878307}


5000it [00:02, 2336.47it/s]
5000it [00:14, 355.91it/s]
5000it [00:02, 2252.43it/s]
5000it [00:13, 358.10it/s]
5000it [00:02, 2454.53it/s]
5000it [00:13, 364.31it/s]


['specs', 'genres', 'developer', 'tags'] {'HR@10': 0.25286666666666663, 'nDCG@10': 0.07365126912390114, 'recall@10': 0.09158171345928645, 'ideal_recall@10': 0.9429034701783022, 'nRecall@10': 0.09508650793650793}


5000it [00:01, 3112.44it/s]
5000it [00:13, 364.38it/s]
5000it [00:01, 3147.18it/s]
5000it [00:13, 370.43it/s]
5000it [00:01, 3140.68it/s]
5000it [00:13, 360.59it/s]


['specs', 'early_access', 'publisher', 'tags'] {'HR@10': 0.26313333333333333, 'nDCG@10': 0.07651871451299042, 'recall@10': 0.09571626259231343, 'ideal_recall@10': 0.9429034701783022, 'nRecall@10': 0.09928719576719576}


5000it [00:02, 2400.17it/s]
5000it [00:14, 355.94it/s]
5000it [00:02, 2357.10it/s]
5000it [00:13, 358.32it/s]
5000it [00:02, 2381.58it/s]
5000it [00:14, 357.09it/s]


['specs', 'early_access', 'developer', 'tags'] {'HR@10': 0.25279999999999997, 'nDCG@10': 0.07369298590099534, 'recall@10': 0.09162933459030874, 'ideal_recall@10': 0.9429034701783022, 'nRecall@10': 0.09512396825396825}


5000it [00:02, 2164.20it/s]
5000it [00:14, 344.91it/s]
5000it [00:02, 2158.60it/s]
5000it [00:14, 341.06it/s]
5000it [00:02, 2179.22it/s]
5000it [00:14, 356.25it/s]


['specs', 'publisher', 'developer', 'tags'] {'HR@10': 0.26366666666666666, 'nDCG@10': 0.0792388269982653, 'recall@10': 0.09852182418310478, 'ideal_recall@10': 0.9429034701783022, 'nRecall@10': 0.1018154232804233}


5000it [00:01, 3151.97it/s]
5000it [00:12, 394.82it/s]
5000it [00:01, 3171.09it/s]
5000it [00:12, 392.66it/s]
5000it [00:01, 3140.57it/s]
5000it [00:12, 394.65it/s]


['genres', 'early_access', 'publisher', 'tags'] {'HR@10': 0.26120000000000004, 'nDCG@10': 0.07417781192556498, 'recall@10': 0.094320090913855, 'ideal_recall@10': 0.9429034701783022, 'nRecall@10': 0.09813222222222222}


5000it [00:02, 2401.02it/s]
5000it [00:12, 391.11it/s]
5000it [00:02, 2439.21it/s]
5000it [00:12, 387.98it/s]
5000it [00:02, 2350.88it/s]
5000it [00:13, 383.48it/s]


['genres', 'early_access', 'developer', 'tags'] {'HR@10': 0.24793333333333334, 'nDCG@10': 0.06985427355294115, 'recall@10': 0.08965999632030504, 'ideal_recall@10': 0.9429034701783022, 'nRecall@10': 0.09318949735449734}


5000it [00:02, 2179.56it/s]
5000it [00:13, 378.84it/s]
5000it [00:02, 2215.09it/s]
5000it [00:12, 392.17it/s]
5000it [00:02, 2215.62it/s]
5000it [00:13, 374.54it/s]


['genres', 'publisher', 'developer', 'tags'] {'HR@10': 0.259, 'nDCG@10': 0.07588120370769746, 'recall@10': 0.09660555468863653, 'ideal_recall@10': 0.9429034701783022, 'nRecall@10': 0.09995698412698413}


5000it [00:02, 2155.40it/s]
5000it [00:13, 371.63it/s]
5000it [00:02, 2192.12it/s]
5000it [00:13, 384.09it/s]
5000it [00:02, 2190.60it/s]
5000it [00:13, 384.37it/s]


['early_access', 'publisher', 'developer', 'tags'] {'HR@10': 0.2594666666666667, 'nDCG@10': 0.07601606741859933, 'recall@10': 0.09694517402355, 'ideal_recall@10': 0.9429034701783022, 'nRecall@10': 0.10029952380952382}


5000it [00:01, 3124.37it/s]
5000it [00:13, 374.15it/s]
5000it [00:01, 3122.54it/s]
5000it [00:13, 374.98it/s]
5000it [00:01, 3118.81it/s]
5000it [00:13, 365.13it/s]


['specs', 'genres', 'early_access', 'publisher', 'tags'] {'HR@10': 0.2630666666666666, 'nDCG@10': 0.07650533060930705, 'recall@10': 0.09571198603553689, 'ideal_recall@10': 0.9429034701783022, 'nRecall@10': 0.09927878306878307}


5000it [00:02, 2272.72it/s]
5000it [00:13, 357.65it/s]
5000it [00:02, 2352.38it/s]
5000it [00:13, 360.18it/s]
5000it [00:02, 2412.98it/s]
5000it [00:13, 363.06it/s]


['specs', 'genres', 'early_access', 'developer', 'tags'] {'HR@10': 0.25286666666666663, 'nDCG@10': 0.07365126912390114, 'recall@10': 0.09158171345928645, 'ideal_recall@10': 0.9429034701783022, 'nRecall@10': 0.09508650793650793}


5000it [00:02, 2100.06it/s]
5000it [00:14, 351.00it/s]
5000it [00:02, 2147.95it/s]
5000it [00:14, 344.43it/s]
5000it [00:02, 2121.04it/s]
5000it [00:14, 340.63it/s]


['specs', 'genres', 'publisher', 'developer', 'tags'] {'HR@10': 0.26286666666666664, 'nDCG@10': 0.0790052078761835, 'recall@10': 0.09815592627909164, 'ideal_recall@10': 0.9429034701783022, 'nRecall@10': 0.10144746031746033}


5000it [00:02, 2154.34it/s]
5000it [00:14, 350.59it/s]
5000it [00:02, 2116.92it/s]
5000it [00:14, 346.81it/s]
5000it [00:02, 2114.54it/s]
5000it [00:14, 343.76it/s]


['specs', 'early_access', 'publisher', 'developer', 'tags'] {'HR@10': 0.26366666666666666, 'nDCG@10': 0.0792388269982653, 'recall@10': 0.09852182418310478, 'ideal_recall@10': 0.9429034701783022, 'nRecall@10': 0.1018154232804233}


5000it [00:02, 2157.93it/s]
5000it [00:13, 381.56it/s]
5000it [00:02, 2210.30it/s]
5000it [00:12, 390.10it/s]
5000it [00:02, 2169.08it/s]
5000it [00:13, 382.99it/s]


['genres', 'early_access', 'publisher', 'developer', 'tags'] {'HR@10': 0.259, 'nDCG@10': 0.07588120370769746, 'recall@10': 0.09660555468863653, 'ideal_recall@10': 0.9429034701783022, 'nRecall@10': 0.09995698412698413}


5000it [00:02, 2121.00it/s]
5000it [00:14, 349.93it/s]
5000it [00:02, 2077.72it/s]
5000it [00:14, 349.85it/s]
5000it [00:02, 2104.36it/s]
5000it [00:14, 344.27it/s]


['specs', 'genres', 'early_access', 'publisher', 'developer', 'tags'] {'HR@10': 0.26286666666666664, 'nDCG@10': 0.0790052078761835, 'recall@10': 0.09815592627909164, 'ideal_recall@10': 0.9429034701783022, 'nRecall@10': 0.10144746031746033}


Lets test different weighting schemes! :-)

In [4]:
weighting_schemes = [{'playtime': playtime, 'sentiment': sentiment, 'reviews': reviews} for playtime in [True, False] for sentiment in ['rating', 'n_reviews', 'mixed', False] for reviews in [True, False]]
def test_weighting_scheme(rec, weighting_scheme):
    rec.set_weighting_scheme(weighting_scheme)
    rec.generate_recommendations(read_max=5000, silence=True)
    return rec.evaluate(k=10)

results = []
recs = [ImprovedRecommender(items_path, train_path=f"{split}_train.parquet", test_path=f"{split}_test.parquet", val_path=f"{split}_val.parquet", reviews_path=reviews_path, sparse=True, tfidf='smooth', normalize=True) for split in [data_path + str(i) for i in range(3)]]
for weighting_scheme in weighting_schemes:
    split_results = []
    for rec in recs:
        split_results.append(test_weighting_scheme(rec, weighting_scheme))
    results.append(average_results(split_results))
    print(weighting_scheme, results[-1])
    
with open('./evaluation/weighting_schemes.pickle', 'wb') as handle:
    pickle.dump(weighting_schemes, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open('./evaluation/weighting_schemes_results.pickle', 'wb') as handle:
    pickle.dump(results, handle, protocol=pickle.HIGHEST_PROTOCOL)

{'playtime': True, 'sentiment': 'rating', 'reviews': True} {'HR@10': 0.17933333333333334, 'nDCG@10': 0.0475884260251058, 'recall@10': 0.058465005786153414, 'ideal_recall@10': 0.9429034701783022, 'nRecall@10': 0.06174928571428571}
{'playtime': True, 'sentiment': 'rating', 'reviews': False} {'HR@10': 0.17926666666666669, 'nDCG@10': 0.04753410167307553, 'recall@10': 0.05851466147328083, 'ideal_recall@10': 0.9429034701783022, 'nRecall@10': 0.061776984126984125}
{'playtime': True, 'sentiment': 'n_reviews', 'reviews': True} {'HR@10': 0.1774, 'nDCG@10': 0.04757885825547136, 'recall@10': 0.05916118510745074, 'ideal_recall@10': 0.9429034701783022, 'nRecall@10': 0.06237457671957672}
{'playtime': True, 'sentiment': 'n_reviews', 'reviews': False} {'HR@10': 0.1769333333333333, 'nDCG@10': 0.04765522202738313, 'recall@10': 0.059140959475475895, 'ideal_recall@10': 0.9429034701783022, 'nRecall@10': 0.0623126455026455}
{'playtime': True, 'sentiment': 'mixed', 'reviews': True} {'HR@10': 0.175933333333333



{'playtime': False, 'sentiment': False, 'reviews': True} {'HR@10': 0.264, 'nDCG@10': 0.07926497169109241, 'recall@10': 0.09857171213828686, 'ideal_recall@10': 0.9429034701783022, 'nRecall@10': 0.10187431216931216}




{'playtime': False, 'sentiment': False, 'reviews': False} {'HR@10': 0.26366666666666666, 'nDCG@10': 0.0792388269982653, 'recall@10': 0.09852182418310478, 'ideal_recall@10': 0.9429034701783022, 'nRecall@10': 0.1018154232804233}


Finally, let's check out dimensionality reduction

In [4]:
dim_reds = [TruncatedSVD(n_components=300, random_state=500), PCA(n_components=300, random_state=500, whiten=False), IncrementalPCA(n_components=300, whiten=False), KernelPCA(n_components=300)]
labels = ["TruncatedSVD(n_components=300)", "PCA(n_components=300)", "IncrementalPCA(n_components=300)", "KernelPCA(n_components=300)"]
# use_data = ['specs', 'genres', 'tags', 'early_access', 'publisher', 'developer']
results = []
for dim_red in dim_reds:
    split_results = []
    for split in [data_path + str(i) for i in range(3)]:
        rec = ImprovedRecommender(items_path, train_path=f"{split}_train.parquet", test_path=f"{split}_test.parquet", val_path=f"{split}_val.parquet", reviews_path=reviews_path, sparse=False, tfidf='smooth', normalize=True, dim_red=dim_red)
        rec.generate_recommendations(read_max=5000)
        split_results.append(rec.evaluate(k=10))
    results.append(average_results(split_results))
    
with open('./evaluation/dim_reduction.pickle', 'wb') as handle:
    pickle.dump(labels, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open('./evaluation/dim_reduction_results.pickle', 'wb') as handle:
    pickle.dump(results, handle, protocol=pickle.HIGHEST_PROTOCOL)

5000it [00:00, 40143.87it/s]
5000it [02:00, 41.33it/s]
5000it [00:00, 34598.76it/s]
5000it [02:10, 38.31it/s]
5000it [00:00, 36323.63it/s]
5000it [02:01, 41.21it/s]
5000it [00:00, 32679.66it/s]
5000it [01:52, 44.61it/s]
5000it [00:00, 35696.63it/s]
5000it [02:06, 39.65it/s]
5000it [00:00, 33774.15it/s]
5000it [01:52, 44.53it/s]
5000it [00:00, 35954.40it/s]
5000it [02:01, 41.25it/s]
5000it [00:00, 30298.05it/s]
5000it [02:01, 41.30it/s]
5000it [00:00, 29749.96it/s]
5000it [02:09, 38.74it/s]
5000it [00:00, 26316.14it/s]
5000it [02:09, 38.50it/s]
5000it [00:00, 25895.40it/s]
5000it [02:05, 39.77it/s]
5000it [00:00, 23364.64it/s]
5000it [02:18, 36.20it/s]


In [None]:
dim_red = SparsePCA(n_components=300, n_jobs=8)
rec = ImprovedRecommender(items_path, train_path=f"{data_path}_train.parquet", test_path=f"{data_path}_test.parquet", val_path=f"{data_path}_val.parquet", reviews_path=reviews_path, sparse=False, tfidf='smooth', normalize=True, dim_red=dim_red)
rec.generate_recommendations(read_max=1000) # read_max=1000
rec.evaluate(k=10)