In [1]:
import numpy as np
import pandas as pd
from scipy.stats import ttest_rel

from dataset import UserSessionItemDataset
from evaluation import ndcg
from models import EASE, AbsEASE
from pipelines import hyperparameter_selection, run_test

In [2]:
DATA_FOLDER = "./data/yelp2018/"

df = pd.read_csv(DATA_FOLDER + "ratings_processed_Yelp.csv")

df.head()

Unnamed: 0,userId,item_id,rating,sessionId,user_session_id,split
0,0,0,1.0,1,0_1,train
1,0,2,1.0,1,0_1,train
2,0,3,1.0,1,0_1,train
3,0,4,1.0,1,0_1,train
4,0,11,1.0,1,0_1,train


In [3]:
# keep only columns needed for experiments
COLUMNS = ["user_session_id", "item_id", "rating", "split"]
df = df[COLUMNS]

# create user session enconding and item encoding
user_session_id_to_idx = {user_session_id: idx for idx, user_session_id in enumerate(df['user_session_id'].unique())}
user_session_idx_to_id = {idx: user_session_id for user_session_id, idx in user_session_id_to_idx.items()}

item_id_to_idx = {item_id: idx for idx, item_id in enumerate(df['item_id'].unique())}
item_idx_to_id = {idx: item_id for item_id, idx in item_id_to_idx.items()}

# map values to idx using the above dicts
df["user_session_id"] = df["user_session_id"].map(user_session_id_to_idx)
df["item_id"] = df["item_id"].map(item_id_to_idx)

# get number of unique user_sessions and unique items
n_user_sessions = len(user_session_id_to_idx)
n_items = len(item_id_to_idx)

# instantiate dataset
dataset = UserSessionItemDataset(df[df.split == "train"], df[df.split == "val"], df[df.split == "test"], n_user_sessions, n_items)

In [4]:
%%capture hypers

l2s = [16., 32., 64., 128., 256., 512., 1024., 2048.]

best_l2s = hyperparameter_selection(dataset, l2s, ndcg, k=100)

In [5]:
for line in hypers.stdout.split("\n"):
    print(line)

L2 16.0
Constructing G...
Density of G: 0.4489%
Inverting G...
EASE
ndcg @ 100: 0.11356763639958112 +- 0.0012374614218068143
AbsEASE
ndcg @ 100: 0.14775150262123476 +- 0.0014504197608181176

L2 32.0
Constructing G...
Density of G: 0.4489%
Inverting G...
EASE
ndcg @ 100: 0.12088552903917829 +- 0.0013033392025733263
AbsEASE
ndcg @ 100: 0.15000147950586812 +- 0.0014666684768030742

L2 64.0
Constructing G...
Density of G: 0.4489%
Inverting G...
EASE
ndcg @ 100: 0.1288046012464397 +- 0.0013770441759773922
AbsEASE
ndcg @ 100: 0.1512183137538284 +- 0.001475508058004225

L2 128.0
Constructing G...
Density of G: 0.4489%
Inverting G...
EASE
ndcg @ 100: 0.13666905492496362 +- 0.0014474027328038963
AbsEASE
ndcg @ 100: 0.15167029132526239 +- 0.0014922829343526588

L2 256.0
Constructing G...
Density of G: 0.4489%
Inverting G...
EASE
ndcg @ 100: 0.14312517784395118 +- 0.0014948938034188605
AbsEASE
ndcg @ 100: 0.1512979757818212 +- 0.0015050038784534011

L2 512.0
Constructing G...
Density of G: 0.4489

In [6]:
%%capture results

models = [("EASE", EASE, best_l2s["EASE"][0]), ("AbsEASE", AbsEASE, best_l2s["AbsEASE"][0])]

results_dict = run_test(models, dataset, ks=[10,20,50,100,200,500])

In [7]:
for line in results.stdout.split("\n"):
    print(line)

Split test
EASE
Constructing G...
Density of G: 0.4489%
Inverting G...
pos_inputs
recall_liked @ 10: 0.032745094642836974 +- 0.0003020656653187208
recall_disliked @ 10: nan +- nan
ndcg @ 10: 0.05923153959377889 +- 0.0005403627032822668

recall_liked @ 20: 0.05532170551229909 +- 0.0004410896337841376
recall_disliked @ 20: nan +- nan
ndcg @ 20: 0.08013639994903421 +- 0.0006380259776728348

recall_liked @ 50: 0.10442090462618063 +- 0.0006905897860955314
recall_disliked @ 50: nan +- nan
ndcg @ 50: 0.11611578349291166 +- 0.0007834394228396144

recall_liked @ 100: 0.16155713769488841 +- 0.0009225221439960687
recall_disliked @ 100: nan +- nan
ndcg @ 100: 0.15077196960016742 +- 0.0009004649019584638

recall_liked @ 200: 0.23823455741596516 +- 0.0011682398710999608
recall_disliked @ 200: nan +- nan
ndcg @ 200: 0.19122794976410665 +- 0.0010147339152254478

recall_liked @ 500: 0.36625721866760125 +- 0.0014229836517692677
recall_disliked @ 500: nan +- nan
ndcg @ 500: 0.25001705667875174 +- 0.00113

In [8]:
%%capture pvalues

baseline, new_model = models[0][0], models[1][0]
baseline_results, new_model_results = results_dict[baseline], results_dict[new_model]

print("two-sided p-values\n")
for input_type in list(baseline_results.keys()):
    print(input_type)
    for k in list(baseline_results[input_type].keys()):
        for metric in list(baseline_results[input_type][k].keys()):
            if metric.endswith("_values"):
                baseline_values = np.array(baseline_results[input_type][k][metric], dtype=np.float32)
                new_model_values = np.array(new_model_results[input_type][k][metric], dtype=np.float32)
                mean_diff = np.mean(new_model_values - baseline_values)
                p_value = ttest_rel(baseline_values, new_model_values).pvalue

                print(f"{metric} @ {k}: mean(new-baseline) = {mean_diff:.6f} (p={p_value:.6f})")

        print()
    print()
print()

In [9]:
for line in pvalues.stdout.split("\n"):
    print(line)

two-sided p-values

pos_inputs
recall_liked_values @ 10: mean(new-baseline) = -0.000130 (p=0.392552)
recall_disliked_values @ 10: mean(new-baseline) = nan (p=nan)
ndcg_values @ 10: mean(new-baseline) = -0.000344 (p=0.130892)

recall_liked_values @ 20: mean(new-baseline) = -0.000152 (p=0.433676)
recall_disliked_values @ 20: mean(new-baseline) = nan (p=nan)
ndcg_values @ 20: mean(new-baseline) = -0.000332 (p=0.148517)

recall_liked_values @ 50: mean(new-baseline) = 0.001050 (p=0.000039)
recall_disliked_values @ 50: mean(new-baseline) = nan (p=nan)
ndcg_values @ 50: mean(new-baseline) = 0.000527 (p=0.026162)

recall_liked_values @ 100: mean(new-baseline) = 0.003726 (p=0.000000)
recall_disliked_values @ 100: mean(new-baseline) = nan (p=nan)
ndcg_values @ 100: mean(new-baseline) = 0.002041 (p=0.000000)

recall_liked_values @ 200: mean(new-baseline) = 0.010829 (p=0.000000)
recall_disliked_values @ 200: mean(new-baseline) = nan (p=nan)
ndcg_values @ 200: mean(new-baseline) = 0.005554 (p=0.000

In [10]:
RESULTS_FOLDER = "./results/yelp2018/"

with open(RESULTS_FOLDER + "hyperparametersYelp.txt", 'w') as f: f.write(hypers.stdout)
with open(RESULTS_FOLDER + "resultsYelp.txt", 'w') as f: f.write(results.stdout)
with open(RESULTS_FOLDER + "pvaluesYelp.txt", "w") as f: f.write(pvalues.stdout)