In [1]:
import numpy as np
import pandas as pd
from scipy.stats import ttest_rel

from dataset import UserSessionItemDataset
from evaluation import ndcg
from models import EASE, AbsEASE
from pipelines import hyperparameter_selection, run_test

In [2]:
DATA_FOLDER = "./data/movielens/"

df = pd.read_csv(DATA_FOLDER + "ratings_processed_MovieLens.csv")

df.head()

Unnamed: 0,userId,item_id,rating,timestamp,datetime,sessionId,user_session_id,split
0,42772,1,1.0,1454372681,2016-02-02 00:24:41,1,42772_1,train
1,44009,1,1.0,1471524035,2016-08-18 12:40:35,1,44009_1,train
2,127689,1,1.0,1542959215,2018-11-23 07:46:55,1,127689_1,train
3,158952,1,1.0,1555696891,2019-04-19 18:01:31,1,158952_1,train
4,3944,1,1.0,1457318834,2016-03-07 02:47:14,1,3944_1,train


In [3]:
# keep only columns needed for experiments
COLUMNS = ["user_session_id", "item_id", "rating", "split"]
df = df[COLUMNS]

# create user session enconding and item encoding
user_session_id_to_idx = {user_session_id: idx for idx, user_session_id in enumerate(df['user_session_id'].unique())}
user_session_idx_to_id = {idx: user_session_id for user_session_id, idx in user_session_id_to_idx.items()}

item_id_to_idx = {item_id: idx for idx, item_id in enumerate(df['item_id'].unique())}
item_idx_to_id = {idx: item_id for item_id, idx in item_id_to_idx.items()}

# map values to idx using the above dicts
df["user_session_id"] = df["user_session_id"].map(user_session_id_to_idx)
df["item_id"] = df["item_id"].map(item_id_to_idx)

# get number of unique user_sessions and unique items
n_user_sessions = len(user_session_id_to_idx)
n_items = len(item_id_to_idx)

# instantiate dataset
dataset = UserSessionItemDataset(df[df.split == "train"], df[df.split == "val"], df[df.split == "test"], n_user_sessions, n_items)

In [4]:
%%capture hypers

l2s = [16., 32., 64., 128., 256., 512., 1024., 2048.]

best_l2s = hyperparameter_selection(dataset, l2s, ndcg, k=100)

In [5]:
for line in hypers.stdout.split("\n"):
    print(line)

L2 16.0
Constructing G...
Density of G: 0.8614%
Inverting G...
EASE
ndcg @ 100: 0.09888176278807757 +- 0.0012452313316275456
AbsEASE
ndcg @ 100: 0.11518722490451243 +- 0.001373478742503578

L2 32.0
Constructing G...
Density of G: 0.8614%
Inverting G...
EASE
ndcg @ 100: 0.10197967221362482 +- 0.0012615963182720628
AbsEASE
ndcg @ 100: 0.11486554677730067 +- 0.001370655788633978

L2 64.0
Constructing G...
Density of G: 0.8614%
Inverting G...
EASE
ndcg @ 100: 0.10432251640457477 +- 0.0012790372546657757
AbsEASE
ndcg @ 100: 0.11357593643266654 +- 0.00135852782776561

L2 128.0
Constructing G...
Density of G: 0.8614%
Inverting G...
EASE
ndcg @ 100: 0.10569621581316385 +- 0.0012911629194186814
AbsEASE
ndcg @ 100: 0.1115139718842576 +- 0.0013373506133319186

L2 256.0
Constructing G...
Density of G: 0.8614%
Inverting G...
EASE
ndcg @ 100: 0.10652827106219759 +- 0.00129943875033183
AbsEASE
ndcg @ 100: 0.1090941276879714 +- 0.0013159483769310707

L2 512.0
Constructing G...
Density of G: 0.8614%
In

In [6]:
%%capture results

models = [("EASE", EASE, best_l2s["EASE"][0]), ("AbsEASE", AbsEASE, best_l2s["AbsEASE"][0])]

results_dict = run_test(models, dataset, ks=[10,20,50,100,200,500])

In [7]:
for line in results.stdout.split("\n"):
    print(line)

Split test
EASE
Constructing G...
Density of G: 0.8614%
Inverting G...
pos_inputs
recall_liked @ 10: 0.027367960484194983 +- 0.0002858207136123444
recall_disliked @ 10: 0.00937069541021903 +- 0.0002831766889089224
ndcg @ 10: 0.03721101691783137 +- 0.000421343472087699

recall_liked @ 20: 0.04719899963840789 +- 0.0003755001360585637
recall_disliked @ 20: 0.018253255365881977 +- 0.00039382772535589354
ndcg @ 20: 0.051204320978116426 +- 0.000485485134017098

recall_liked @ 50: 0.09431455837017902 +- 0.0005435316673981164
recall_disliked @ 50: 0.04226728854125566 +- 0.0006033763916088162
ndcg @ 50: 0.07738647029900476 +- 0.0006055021518742814

recall_liked @ 100: 0.1545218697145432 +- 0.0007365315501937447
recall_disliked @ 100: 0.0779810022195923 +- 0.0008308543736018609
ndcg @ 100: 0.10489852018470314 +- 0.000738417535186902

recall_liked @ 200: 0.24364567957323457 +- 0.001005046469844797
recall_disliked @ 200: 0.1379936945001327 +- 0.0011120954874125207
ndcg @ 200: 0.1390009486243042 +-

In [8]:
%%capture pvalues

baseline, new_model = models[0][0], models[1][0]
baseline_results, new_model_results = results_dict[baseline], results_dict[new_model]

print("two-sided p-values\n")
for input_type in list(baseline_results.keys()):
    print(input_type)
    for k in list(baseline_results[input_type].keys()):
        for metric in list(baseline_results[input_type][k].keys()):
            if metric.endswith("_values"):
                baseline_values = np.array(baseline_results[input_type][k][metric], dtype=np.float32)
                new_model_values = np.array(new_model_results[input_type][k][metric], dtype=np.float32)
                mean_diff = np.mean(new_model_values - baseline_values)
                p_value = ttest_rel(baseline_values, new_model_values).pvalue

                print(f"{metric} @ {k}: mean(new-baseline) = {mean_diff:.6f} (p={p_value:.6f})")

        print()
    print()
print()

In [9]:
for line in pvalues.stdout.split("\n"):
    print(line)

two-sided p-values

pos_inputs
recall_liked_values @ 10: mean(new-baseline) = 0.000926 (p=0.000000)
recall_disliked_values @ 10: mean(new-baseline) = 0.000449 (p=0.058208)
ndcg_values @ 10: mean(new-baseline) = 0.001588 (p=0.000000)

recall_liked_values @ 20: mean(new-baseline) = 0.002521 (p=0.000000)
recall_disliked_values @ 20: mean(new-baseline) = 0.000981 (p=0.003254)
ndcg_values @ 20: mean(new-baseline) = 0.002675 (p=0.000000)

recall_liked_values @ 50: mean(new-baseline) = 0.006938 (p=0.000000)
recall_disliked_values @ 50: mean(new-baseline) = 0.003010 (p=0.000000)
ndcg_values @ 50: mean(new-baseline) = 0.005122 (p=0.000000)

recall_liked_values @ 100: mean(new-baseline) = 0.015520 (p=0.000000)
recall_disliked_values @ 100: mean(new-baseline) = 0.006463 (p=0.000000)
ndcg_values @ 100: mean(new-baseline) = 0.008449 (p=0.000000)

recall_liked_values @ 200: mean(new-baseline) = 0.035775 (p=0.000000)
recall_disliked_values @ 200: mean(new-baseline) = 0.019646 (p=0.000000)
ndcg_values

In [10]:
RESULTS_FOLDER = "./results/movielens/"

with open(RESULTS_FOLDER + "hyperparametersMovieLens.txt", 'w') as f: f.write(hypers.stdout)
with open(RESULTS_FOLDER + "resultsMovieLens.txt", 'w') as f: f.write(results.stdout)
with open(RESULTS_FOLDER + "pvaluesMovieLens.txt", "w") as f: f.write(pvalues.stdout)