In [1]:
import os
import sys

dir2 = os.path.abspath("")
dir1 = os.path.dirname(dir2)
if not dir1 in sys.path:
    sys.path.append(dir1)
os.chdir("../..")

In [41]:
from pathlib import Path
from itertools import product

import numpy as np
import pandas as pd

from scipy.stats import norm

from src.utils.aggregation import aggregate_methods, filter_data
from src.compare_methods import run_DM, run_CD, bayes_scores, binarize_bayes

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [23]:
result = aggregate_methods(Path("results/metrics/"))

In [24]:
result

Unnamed: 0,Method,Dataset,Metric,k,Value
0,most_popular,mts_library,precision,5,0.002639
1,most_popular,mts_library,precision,10,0.007337
2,most_popular,mts_library,precision,20,0.008621
3,most_popular,mts_library,precision,100,0.016131
4,most_popular,mts_library,recall,5,0.002495
...,...,...,...,...,...
9971,implicit_als,movielens_10m,time_fit,100,2.746746
9972,implicit_als,movielens_10m,time_predict,5,2.031604
9973,implicit_als,movielens_10m,time_predict,10,2.031604
9974,implicit_als,movielens_10m,time_predict,20,2.031604


In [25]:
result.to_csv("agg_metrics.csv")

In [6]:
filtered_data = filter_data(result, "ndcg", 10)


['ndcg']


In [7]:
run_DM(
    filtered_data,
    save_image=True,
    image_path=Path("."),
    figsize=(14,10),
    fontsize=20
)

Unnamed: 0,Model_name,score,ranks
0,implicit_als,0.075518,8.0
1,implicit_bpr,0.056486,9.0
2,lightfm,0.136724,3.0
3,most_popular,0.082369,7.0
4,msrec_sasrec,0.101401,6.0
5,random,0.0,10.0
6,recbole_EASE,0.158952,1.0
7,recbole_ItemKNN,0.131242,4.0
8,recbole_MultiVAE,0.139616,2.0
9,recbole_SLIMElastic,0.117692,5.0


In [8]:
df_counts = pd.DataFrame({'count': filtered_data.groupby(
    ['Method']).size()}).reset_index()

max_nb_datasets = df_counts['count'].max()

In [9]:
df_counts

Unnamed: 0,Method,count
0,implicit_als,29
1,implicit_bpr,29
2,lightfm,29
3,most_popular,29
4,msrec_sasrec,29
5,random,29
6,recbole_EASE,29
7,recbole_ItemKNN,29
8,recbole_MultiVAE,29
9,recbole_SLIMElastic,29


In [26]:
run_CD(
    filtered_data.copy(),
    save_image=True,
    image_path=Path(".")
)

Pairwise testing:   0%|          | 0/45 [00:00<?, ?it/s]

Unnamed: 0,Model_name,score,ranks
0,random,9.827586,10.0
1,BPR,7.241379,9.0
2,MostPop,6.793103,8.0
3,SASRec,6.344828,7.0
4,ALS,5.965517,6.0
5,ItemKNN,4.310345,5.0
6,SLIM,4.275862,3.5
7,MultiVAE,4.275862,3.5
8,LightFM,3.689655,2.0
9,EASE,2.275862,1.0


In [30]:
small = np.random.choice(filtered_data["Dataset"].unique(), 5)
filtered_small_data = filtered_data[filtered_data["Dataset"].isin(small)]

In [13]:
run_CD(
    filtered_small_data.copy(),
    save_image=True,
    image_path=Path("."),
    image_name="CD_small"
)

NameError: name 'filtered_small_data' is not defined

In [55]:
methods = (
    Path("implicit/als"),
    Path("implicit/bpr"),
    Path("lightfm"),
    Path("most_popular"),
    Path("msrec/sasrec"),
    Path("random"),
    Path("recbole/EASE"),
    Path("recbole/ItemKNN"),
    Path("recbole/MultiVAE"),
    Path("recbole/SLIMElastic")
)

datasets = [
    Path("epinions"),
    Path("movielens_1m"),
    Path("amazon_finefoods"),
    Path("kuairec_full"),
    Path("yelp")
]

In [56]:
def calculate_mean_ci(method: Path, dataset: Path):
    dir_path = Path("results/metrics/").joinpath(
        method, dataset
    )

    file = None
    for file in dir_path.glob("*nfold_100*"):
        if file.stem.endswith("True"):
            break
    if file is None:
        print(f"{method}, {dataset} do not exist")
        return

    dataframe = pd.read_csv(file, index_col=["Run_id", "k"])

    if dataframe["Diversity@k"].isna().sum() > 0:
        print(f"{method}, {dataset} bad diversity!")
        return

    return dataframe


In [61]:
for method, dataset in product(methods, datasets):
    calculate_mean_ci(method, dataset)

random, amazon_finefoods bad diversity!
random, kuairec_full bad diversity!
random, yelp do not exist
recbole/EASE, yelp bad diversity!
recbole/ItemKNN, epinions bad diversity!
recbole/ItemKNN, amazon_finefoods bad diversity!
recbole/ItemKNN, kuairec_full bad diversity!
recbole/ItemKNN, yelp bad diversity!
recbole/MultiVAE, epinions bad diversity!
recbole/MultiVAE, movielens_1m bad diversity!
recbole/MultiVAE, amazon_finefoods bad diversity!
recbole/MultiVAE, kuairec_full bad diversity!
recbole/MultiVAE, yelp bad diversity!
recbole/SLIMElastic, epinions bad diversity!
recbole/SLIMElastic, movielens_1m do not exist
recbole/SLIMElastic, amazon_finefoods bad diversity!
recbole/SLIMElastic, kuairec_full bad diversity!
