In [1]:
from src.recommenders import ImprovedRecommender
from sklearn.decomposition import TruncatedSVD
import pandas as pd
import numpy as np
from os import cpu_count
from multiprocessing import Pool
from itertools import combinations

In [2]:
qual_eval_folder = './evaluation'
items_path = "./data/games.pkl"
data_path = "./data/interactions_splits_0"
reviews_path = "./data/reviews.parquet"

Let's test with different feedback weighting schemes!

In [3]:
weighting_schemes = [{'playtime': playtime, 'sentiment': sentiment, 'reviews': reviews} for playtime in [True, False] for sentiment in ['rating', 'n_reviews', 'mixed', False] for reviews in [True, False]]
def test_weighting_scheme(rec, weighting_scheme):
    rec.set_weighting_scheme(weighting_scheme)
    rec.generate_recommendations()
    return rec.evaluate(k=10)

rec = ImprovedRecommender(items_path, train_path=f"{data_path}_train.parquet", test_path=f"{data_path}_test.parquet", val_path=f"{data_path}_val.parquet", reviews_path=reviews_path, sparse=True, tfidf='smooth', normalize=True)
for weighting_scheme in weighting_schemes:
    print(test_weighting_scheme(rec, weighting_scheme))

# with Pool(min(cpu_count(), len(weighting_schemes))) as pool:
#     results = [pool.apply_async(test_weighting_scheme, args=(rec, weighting_scheme)) for rec, weighting_scheme in zip(recommenders, weighting_schemes)]
#     output = [p.get() for p in results]
# print(output)

Let's test with different item representations!

In [3]:
information = ['specs', 'genres', 'early_access', 'publisher', 'developer']
def test_weighting_scheme(columns):
    rec = ImprovedRecommender(items_path, train_path=f"{data_path}_train.parquet", test_path=f"{data_path}_test.parquet", val_path=f"{data_path}_val.parquet", reviews_path=reviews_path, sparse=True, tfidf='smooth', normalize=True, columns=columns)
    rec.generate_recommendations(read_max=5000)
    return rec.evaluate(k=10)

representations = [map(list, combinations(information, r)) for r in range(1, len(information))]
representations = [rep+['tags'] for sublist in representations for rep in sublist]
for columns in representations:
    print(test_weighting_scheme(columns))

# with Pool(min(cpu_count(), len(weighting_schemes))) as pool:
#     results = [pool.apply_async(test_weighting_scheme, args=(rec, weighting_scheme)) for rec, weighting_scheme in zip(recommenders, weighting_schemes)]
#     output = [p.get() for p in results]
# print(output)

5000it [00:07, 630.09it/s]
5000it [00:07, 667.61it/s]


                                                item_id  \
0     [3485, 2370, 163, 2188, 2484, 2130, 3197, 413,...   
1     [470, 3223, 1912, 4349, 2249, 380, 3860, 1483,...   
2     [173, 139, 2088, 2132, 285, 352, 678, 521, 798...   
3     [206, 299, 354, 1125, 2196, 2839, 1752, 1410, ...   
4     [417, 1897, 4786, 840, 1637, 3957, 926, 505, 4...   
...                                                 ...   
4995  [1023, 848, 375, 660, 762, 649, 222, 1182, 454...   
4996  [77, 376, 30, 2133, 412, 492, 337, 372, 1031, ...   
4997  [1301, 12, 1384, 122, 381, 41, 231, 669, 100, ...   
4998  [29, 640, 362, 4935, 343, 494, 61, 7, 41, 527,...   
4999  [82, 958, 32, 362, 30, 264, 142, 179, 226, 64,...   

                                       playtime_forever  \
0     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...   
1     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...   
2     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...   
3     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...

5000it [00:08, 602.20it/s]
5000it [00:15, 330.25it/s]


                                                item_id  \
0     [3485, 2370, 163, 2188, 2484, 2130, 3197, 413,...   
1     [470, 3223, 1912, 4349, 2249, 380, 3860, 1483,...   
2     [173, 139, 2088, 2132, 285, 352, 678, 521, 798...   
3     [206, 299, 354, 1125, 2196, 2839, 1752, 1410, ...   
4     [417, 1897, 4786, 840, 1637, 3957, 926, 505, 4...   
...                                                 ...   
4995  [1023, 848, 375, 660, 762, 649, 222, 1182, 454...   
4996  [77, 376, 30, 2133, 412, 492, 337, 372, 1031, ...   
4997  [1301, 12, 1384, 122, 381, 41, 231, 669, 100, ...   
4998  [29, 640, 362, 4935, 343, 494, 61, 7, 41, 527,...   
4999  [82, 958, 32, 362, 30, 264, 142, 179, 226, 64,...   

                                       playtime_forever  \
0     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...   
1     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...   
2     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...   
3     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...

5000it [00:07, 636.43it/s]
5000it [00:06, 776.73it/s]


                                                item_id  \
0     [3485, 2370, 163, 2188, 2484, 2130, 3197, 413,...   
1     [470, 3223, 1912, 4349, 2249, 380, 3860, 1483,...   
2     [173, 139, 2088, 2132, 285, 352, 678, 521, 798...   
3     [206, 299, 354, 1125, 2196, 2839, 1752, 1410, ...   
4     [417, 1897, 4786, 840, 1637, 3957, 926, 505, 4...   
...                                                 ...   
4995  [1023, 848, 375, 660, 762, 649, 222, 1182, 454...   
4996  [77, 376, 30, 2133, 412, 492, 337, 372, 1031, ...   
4997  [1301, 12, 1384, 122, 381, 41, 231, 669, 100, ...   
4998  [29, 640, 362, 4935, 343, 494, 61, 7, 41, 527,...   
4999  [82, 958, 32, 362, 30, 264, 142, 179, 226, 64,...   

                                       playtime_forever  \
0     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...   
1     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...   
2     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...   
3     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...

5000it [00:17, 284.67it/s]
5000it [00:11, 419.00it/s]


                                                item_id  \
0     [3485, 2370, 163, 2188, 2484, 2130, 3197, 413,...   
1     [470, 3223, 1912, 4349, 2249, 380, 3860, 1483,...   
2     [173, 139, 2088, 2132, 285, 352, 678, 521, 798...   
3     [206, 299, 354, 1125, 2196, 2839, 1752, 1410, ...   
4     [417, 1897, 4786, 840, 1637, 3957, 926, 505, 4...   
...                                                 ...   
4995  [1023, 848, 375, 660, 762, 649, 222, 1182, 454...   
4996  [77, 376, 30, 2133, 412, 492, 337, 372, 1031, ...   
4997  [1301, 12, 1384, 122, 381, 41, 231, 669, 100, ...   
4998  [29, 640, 362, 4935, 343, 494, 61, 7, 41, 527,...   
4999  [82, 958, 32, 362, 30, 264, 142, 179, 226, 64,...   

                                       playtime_forever  \
0     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...   
1     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...   
2     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...   
3     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...

5000it [00:21, 233.69it/s]
5000it [00:12, 403.40it/s]


                                                item_id  \
0     [3485, 2370, 163, 2188, 2484, 2130, 3197, 413,...   
1     [470, 3223, 1912, 4349, 2249, 380, 3860, 1483,...   
2     [173, 139, 2088, 2132, 285, 352, 678, 521, 798...   
3     [206, 299, 354, 1125, 2196, 2839, 1752, 1410, ...   
4     [417, 1897, 4786, 840, 1637, 3957, 926, 505, 4...   
...                                                 ...   
4995  [1023, 848, 375, 660, 762, 649, 222, 1182, 454...   
4996  [77, 376, 30, 2133, 412, 492, 337, 372, 1031, ...   
4997  [1301, 12, 1384, 122, 381, 41, 231, 669, 100, ...   
4998  [29, 640, 362, 4935, 343, 494, 61, 7, 41, 527,...   
4999  [82, 958, 32, 362, 30, 264, 142, 179, 226, 64,...   

                                       playtime_forever  \
0     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...   
1     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...   
2     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...   
3     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...

4037it [00:06, 612.36it/s]


KeyboardInterrupt: 