Import all the required packages for our code

In [1]:
import itertools
import shutil
from multiprocessing import Pool
from evaluate_recommender import evaluate_recommender, generate_gt, map_id_to_name, parse_json
from os import cpu_count
from os.path import exists
qual_eval_folder = './evaluation'

Test the basic recommender using different distance metrics and tf-idf methods

Distance metrics:
- Euclidian distance: `sqrt(sum((x - y)^2))`
- Cosine distance: $1-\frac{x \cdot y}{||x||_2||y||_2}$
- Manhattan distance: `sum(|x - y|)`
- Chebyshev distance: `max(|x - y|)`

Tf-idf methods:
- No tf-idf
- default tf-idf: `tf(t, d) * [log [n/df(t)] + 1]`
- smoothed tf-idf: `tf(t, d) * [log [(1+n)/(1+df(t))] + 1]`
- sublinear tf-idf: `[1 + log(tf)] * [log [n/df(t)] + 1]`
- smoothed sublinear tf-idf: `[1 + log(tf)] * [log [(1+n)/(1+df(t))] + 1]`



In [3]:
gt_file = './data/ground_truth.parquet'
if not exists(gt_file):
    generate_gt(gt_file)
metrics = ['euclidean', 'cosine', 'manhattan']
tfidf = [None, 'default', 'smooth', 'sublinear', 'smooth_sublinear']
combinations = list(itertools.product(metrics, tfidf))
with Pool(min(cpu_count(), len(combinations))) as pool:
    results = [pool.apply_async(evaluate_recommender, args=(metric, tfidf, qual_eval_folder)) for metric, tfidf in combinations]
    output = [p.get() for p in results]
for result in output:
    print(result[0], result[1], '\b:', result[2])

TypeError: unsupported operand type(s) for +: 'NoneType' and 'str'

In [12]:
shutil.make_archive(f'{qual_eval_folder}/evaluation', 'zip', f'{qual_eval_folder}/source')
shutil.rmtree(f'{qual_eval_folder}/source')

In [3]:
shutil.unpack_archive(f'{qual_eval_folder}/evaluation.zip', qual_eval_folder)

import glob
games = parse_json("./data/steam_games.json")
games = games[['id', 'app_name']]
mapping = dict(zip(games.id, games.app_name))
for f in glob.glob(f'{qual_eval_folder}/*.csv'):
    map_id_to_name(mapping, f)

32135it [00:01, 21678.42it/s]


Reading 32135 rows.
