# Generate experiment results

This notebook can be used to generate all tables/images based on (already run) experiments. It expects a results folder with all needed experiment results in there. Please use `evaluate.py` to generate these files. Results with the same model, dataset and similarity measure are by default overwritten.

The purpose of this (seperate) notebook is to cleanly keep track of all (generated) results, and to provide a direct export for the paper, avoiding small mistakes and allowing for easy changes.

In [145]:
import json
import matplotlib.pyplot as plt
import pandas as pd
import ast
from IPython.display import display
import numpy as np

data = None
with open('results/data.json', 'r') as openfile: data = json.load(openfile)

In [146]:
df = pd.DataFrame(columns=['model','dataset','metric'] + (list(data[list(data.keys())[0]].keys())))
_values = (list(data[list(data.keys())[0]].keys()))

_original_metrics_names = ['recall@010', 'recall@020', 'ndcg@010', 'ndcg@020']
_added_metrics_names = ['accuracy@010', 'accuracy@020', 'precision@010', 'precision@020', 'f1@010', 'f1@020', 'mrr@010', 'mrr@020', 'phr@010', 'phr@020']

_original_metrics = ['Recall@10', 'Recall@20', 'NDCG@10', 'NDCG@20']
_added_metrics = ['Accuracy@10', 'Accuracy@20', 'Precision@10', 'Precision@20', 'F1@10', 'F1@20', 'MRR@10', 'MRR@20', 'PHR@10', 'PHR@20']


In [147]:
for key, values in data.items():
    key = ast.literal_eval(key)
    df.loc[len(df.index)] = [key[0], key[1], key[2]] + list(values.values())

df = df.drop(['metric'], axis=1)

df = df.round(4)
updated_column_names = {
    'model': "Model",
    'dataset': "Dataset",
    'top_personal': "TopPersonal",
    'tifuknn':'TIFUKNN',
    'tafeng': "Tafeng",
    'instacart': "Instacart",
    'dunnhumby': "Dunnhumby",
    'tmall': 'Tmall',
    'taobao': 'Taobao',
    'valuedshopper': "ValuedShopper",
    'betavae': 'BetaVAE'
}
df = df.rename(updated_column_names, axis=1)
df = df.replace(updated_column_names)
df = df.set_axis(['Model', 'Dataset'] +_original_metrics + _added_metrics, axis=1)
display(df)

Unnamed: 0,Model,Dataset,Recall@10,Recall@20,NDCG@10,NDCG@20,Accuracy@10,Accuracy@20,Precision@10,Precision@20,F1@10,F1@20,MRR@10,MRR@20,PHR@10,PHR@20
0,TopPersonal,Tafeng,0.1297,0.1738,0.107,0.1219,0.1222,0.1725,0.0547,0.0405,0.068,0.0578,0.1993,0.1993,0.3819,0.4963
1,TopPersonal,Instacart,0.3999,0.4603,0.404,0.4225,0.3301,0.4485,0.283,0.2067,0.3112,0.2588,0.6335,0.6335,0.8672,0.9169
2,TIFUKNN,Tafeng,0.1378,0.1868,0.1118,0.1278,0.1295,0.1853,0.0589,0.0432,0.0731,0.0616,0.2093,0.2093,0.3998,0.5165
3,TIFUKNN,Instacart,0.4308,0.4962,0.4247,0.4478,0.3764,0.4885,0.277,0.1956,0.3125,0.2536,0.6346,0.6346,0.8629,0.9083
4,TIFUKNN,Dunnhumby,0.274,0.3184,0.2628,0.2731,0.2383,0.313,0.1545,0.1074,0.1774,0.1394,0.4355,0.4355,0.6311,0.6993
5,TIFUKNN,Taobao,0.2701,0.2734,0.2498,0.251,0.2695,0.2733,0.0758,0.039,0.1142,0.0662,0.2561,0.2561,0.311,0.3149
6,TIFUKNN,Tmall,0.1147,0.1412,0.0963,0.1071,0.1141,0.1412,0.0427,0.0281,0.0576,0.0443,0.1357,0.1357,0.2423,0.2797
7,TIFUKNN,ValuedShopper,0.2731,0.3138,0.2663,0.274,0.2198,0.2999,0.1768,0.1279,0.1972,0.1592,0.4544,0.4544,0.6923,0.7674
8,TopPersonal,Dunnhumby,0.2613,0.308,0.2501,0.2616,0.2265,0.3028,0.1484,0.1044,0.1699,0.1353,0.4162,0.4162,0.6118,0.6883
9,TopPersonal,Taobao,0.2671,0.2724,0.2279,0.2299,0.2664,0.2724,0.0749,0.0389,0.1128,0.066,0.2282,0.2282,0.3097,0.3162


In [148]:
df = df.set_index(['Dataset', 'Model'])
df = df.sort_index(level=0, ascending=False)
display(df)

Unnamed: 0_level_0,Unnamed: 1_level_0,Recall@10,Recall@20,NDCG@10,NDCG@20,Accuracy@10,Accuracy@20,Precision@10,Precision@20,F1@10,F1@20,MRR@10,MRR@20,PHR@10,PHR@20
Dataset,Model,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
ValuedShopper,TopPersonal,0.2656,0.3074,0.26,0.2682,0.2131,0.2936,0.1727,0.1259,0.1925,0.1565,0.4493,0.4493,0.6834,0.7624
ValuedShopper,TIFUKNN,0.2731,0.3138,0.2663,0.274,0.2198,0.2999,0.1768,0.1279,0.1972,0.1592,0.4544,0.4544,0.6923,0.7674
ValuedShopper,BetaVAE,0.1257,0.1444,0.1249,0.1275,0.0992,0.1374,0.0847,0.0619,0.0936,0.0763,0.2748,0.2748,0.4997,0.5868
Tmall,TopPersonal,0.1051,0.1262,0.085,0.0939,0.1045,0.1262,0.0387,0.0251,0.0523,0.0395,0.1218,0.1218,0.2319,0.2709
Tmall,TIFUKNN,0.1147,0.1412,0.0963,0.1071,0.1141,0.1412,0.0427,0.0281,0.0576,0.0443,0.1357,0.1357,0.2423,0.2797
Taobao,TopPersonal,0.2671,0.2724,0.2279,0.2299,0.2664,0.2724,0.0749,0.0389,0.1128,0.066,0.2282,0.2282,0.3097,0.3162
Taobao,TIFUKNN,0.2701,0.2734,0.2498,0.251,0.2695,0.2733,0.0758,0.039,0.1142,0.0662,0.2561,0.2561,0.311,0.3149
Tafeng,TopPersonal,0.1297,0.1738,0.107,0.1219,0.1222,0.1725,0.0547,0.0405,0.068,0.0578,0.1993,0.1993,0.3819,0.4963
Tafeng,TIFUKNN,0.1378,0.1868,0.1118,0.1278,0.1295,0.1853,0.0589,0.0432,0.0731,0.0616,0.2093,0.2093,0.3998,0.5165
Tafeng,BetaVAE,0.1218,0.1446,0.1071,0.1155,0.1183,0.144,0.038,0.0268,0.0507,0.0399,0.1931,0.1931,0.3035,0.3951


In [144]:
# Creating the Latex table for the original metrics in the report
original_metrics = df[_original_metrics].style#.highlight_max(axis=None, props='cellcolor:{red}; bfseries: ;')
original_metrics = original_metrics.format(precision=4)
original_metrics.to_latex(position="H", position_float="centering",
    hrules=True, label="table:_original_results", caption="Original metrics", buf='results_original_metrics.tex')

# Creating the Latex table for the additional metrics in the report
additional_metrics = df[_added_metrics].drop(['Precision@10', 'Precision@20', 'F1@10', 'F1@20'], axis=1)
display(additional_metrics)
additional_metrics = additional_metrics.style#.highlight_max(axis=None, props='cellcolor:{red}; bfseries: ;')
additional_metrics = additional_metrics.format(precision=4)
additional_metrics.to_latex(position="H", position_float="centering",
    hrules=True, label="table:additional_results", caption="Additional metrics", buf='results_additional_metrics.tex')

Unnamed: 0_level_0,Unnamed: 1_level_0,Accuracy@10,Accuracy@20,MRR@10,MRR@20,PHR@10,PHR@20
Dataset,Model,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
ValuedShopper,TopPersonal,0.2131,0.2936,0.4493,0.4493,0.6834,0.7624
ValuedShopper,TIFUKNN,0.2198,0.2999,0.4544,0.4544,0.6923,0.7674
Tmall,TopPersonal,0.1045,0.1262,0.1218,0.1218,0.2319,0.2709
Tmall,TIFUKNN,0.1141,0.1412,0.1357,0.1357,0.2423,0.2797
Taobao,TopPersonal,0.2664,0.2724,0.2282,0.2282,0.3097,0.3162
Taobao,TIFUKNN,0.2695,0.2733,0.2561,0.2561,0.311,0.3149
Tafeng,TopPersonal,0.1222,0.1725,0.1993,0.1993,0.3819,0.4963
Tafeng,TIFUKNN,0.1295,0.1853,0.2093,0.2093,0.3998,0.5165
Instacart,TopPersonal,0.3301,0.4485,0.6335,0.6335,0.8672,0.9169
Instacart,TIFUKNN,0.3764,0.4885,0.6346,0.6346,0.8629,0.9083


### Dataset metrics & (optimal) model hyperparameters

In [136]:
df = pd.read_csv('report_results/datasets_data.csv')
df = df.replace(updated_column_names).set_index('Dataset').round(4)
display(df)
df = df.style.format(precision=3)
df.to_latex(position="H", position_float="centering",
    hrules=True, label="table:dataset_metrics", caption="Dataset metrics", buf='dataset_metrics.tex')

df = pd.read_csv('report_results/optimal_hyperparameters.csv')
df = df.replace(updated_column_names).set_index('Dataset').round(4)
# df = df.set_axis(['# nearest_neighbors'], axis=1) # maybe we want to report 
display(df)
df = df.style.format(precision=1)
df.to_latex(position="H", position_float="centering",
    hrules=True, label="table:optimal_hyperparameters", caption="Optimal hyper parameters used", buf='optimal_hyperparams.tex')

Unnamed: 0_level_0,#Users,#Items,#Baskets,Avg. basket size,#Baskets per user,Min. basket size,Max. basket size
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Instacart,19935,7999,307353,8.977,15.4178,1,89
Tmall,225886,720871,401987,6.2951,1.7796,4,173
Dunnhumby,36241,4995,289928,7.396,8.0,1,2350
Tafeng,13949,11997,93372,6.2755,6.6938,1,109
Taobao,99305,191744,126034,2.6612,1.2692,2,50
ValuedShopper,10000,7874,598556,8.7182,59.8556,1,128


Unnamed: 0_level_0,Num Nearest Neighbors,Within decay rate,Group decay rate,Group count,Alpha
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Instacart,900,0.9,0.7,3,0.9
Tafeng,300,0.9,0.7,7,0.7
Dunnhumby,900,0.9,0.6,3,0.2
ValuedShopper,300,1.0,0.6,7,0.7
Tmall,100,0.6,0.8,18,0.7
Taobao,300,0.6,0.8,10,0.1
