In [1]:
import numpy as np
import pandas as pd
from scipy.stats import ttest_rel

from dataset import UserSessionItemDataset
from evaluation import ndcg
from models import EASE, AbsEASE
from pipelines import hyperparameter_selection, run_test

In [2]:
DATA_FOLDER = "./data/yelp2018/"

df = pd.read_csv(DATA_FOLDER + "ratings_processed_Yelp.csv")

df.head()

Unnamed: 0,userId,item_id,rating,sessionId,user_session_id,split
0,0,0,1.0,1,0_1,train
1,0,16,1.0,1,0_1,train
2,0,14,1.0,1,0_1,train
3,0,13,1.0,1,0_1,train
4,0,12,1.0,1,0_1,train


In [3]:
# keep only columns needed for experiments
COLUMNS = ["user_session_id", "item_id", "rating", "split"]
df = df[COLUMNS]

# create user session enconding and item encoding
user_session_id_to_idx = {user_session_id: idx for idx, user_session_id in enumerate(df['user_session_id'].unique())}
user_session_idx_to_id = {idx: user_session_id for user_session_id, idx in user_session_id_to_idx.items()}

item_id_to_idx = {item_id: idx for idx, item_id in enumerate(df['item_id'].unique())}
item_idx_to_id = {idx: item_id for item_id, idx in item_id_to_idx.items()}

# map values to idx using the above dicts
df["user_session_id"] = df["user_session_id"].map(user_session_id_to_idx)
df["item_id"] = df["item_id"].map(item_id_to_idx)

# get number of unique user_sessions and unique items
n_user_sessions = len(user_session_id_to_idx)
n_items = len(item_id_to_idx)

# instantiate dataset
dataset = UserSessionItemDataset(df[df.split == "train"], df[df.split == "val"], df[df.split == "test"], n_user_sessions, n_items)

In [4]:
%%capture hypers

l2s = [16., 32., 64., 128., 256., 512., 1024., 2048.]

best_l2s = hyperparameter_selection(dataset, l2s, ndcg, k=100)

In [5]:
for line in hypers.stdout.split("\n"):
    print(line)

L2 16.0
Constructing G...
Density of G: 0.3285%
Inverting G...
EASE
ndcg @ 100: 0.055249984269925514 +- 0.0007057641477314809
AbsEASE
ndcg @ 100: 0.07548028071484433 +- 0.0008521408866767183

L2 32.0
Constructing G...
Density of G: 0.3285%
Inverting G...
EASE
ndcg @ 100: 0.05826315890407723 +- 0.0007316461571719078
AbsEASE
ndcg @ 100: 0.07760302378049373 +- 0.0008586345347155784

L2 64.0
Constructing G...
Density of G: 0.3285%
Inverting G...
EASE
ndcg @ 100: 0.062325971181506006 +- 0.0007625645901404675
AbsEASE
ndcg @ 100: 0.07901325571249815 +- 0.0008649380931428717

L2 128.0
Constructing G...
Density of G: 0.3285%
Inverting G...
EASE
ndcg @ 100: 0.06693924935298731 +- 0.000800331216220419
AbsEASE
ndcg @ 100: 0.08023064861088908 +- 0.0008692861811682099

L2 256.0
Constructing G...
Density of G: 0.3285%
Inverting G...
EASE
ndcg @ 100: 0.07157257228379826 +- 0.0008372392407931637
AbsEASE
ndcg @ 100: 0.08079869618061898 +- 0.0008740671980201814

L2 512.0
Constructing G...
Density of G: 0

In [6]:
%%capture results

models = [("EASE", EASE, best_l2s["EASE"][0]), ("AbsEASE", AbsEASE, best_l2s["AbsEASE"][0])]

results_dict = run_test(models, dataset, ks=[10,20,50,100,200,500])

In [7]:
for line in results.stdout.split("\n"):
    print(line)

Split test
EASE
Constructing G...
Density of G: 0.3285%
Inverting G...
pos_inputs
recall_liked @ 10: 0.01503016852764036 +- 0.0001580029362231856
recall_disliked @ 10: nan +- nan
ndcg @ 10: 0.0321671136516089 +- 0.00034079465133611997

recall_liked @ 20: 0.025132960090443763 +- 0.00022176337017358583
recall_disliked @ 20: nan +- nan
ndcg @ 20: 0.043188068583401566 +- 0.00039019736154924145

recall_liked @ 50: 0.04676027729885171 +- 0.0003317505529642154
recall_disliked @ 50: nan +- nan
ndcg @ 50: 0.06179184088297201 +- 0.00045983483520802926

recall_liked @ 100: 0.07101777665578964 +- 0.0004288274996293379
recall_disliked @ 100: nan +- nan
ndcg @ 100: 0.07904482246674566 +- 0.0005131639200294055

recall_liked @ 200: 0.10282095687034548 +- 0.0005252593935314189
recall_disliked @ 200: nan +- nan
ndcg @ 200: 0.09870124877110344 +- 0.0005620217620923528

recall_liked @ 500: 0.15504037823751426 +- 0.000616055635126808
recall_disliked @ 500: nan +- nan
ndcg @ 500: 0.12674438203629748 +- 0.00

In [8]:
%%capture pvalues

baseline, new_model = models[0][0], models[1][0]
baseline_results, new_model_results = results_dict[baseline], results_dict[new_model]

print("two-sided p-values\n")
for input_type in list(baseline_results.keys()):
    print(input_type)
    for k in list(baseline_results[input_type].keys()):
        for metric in list(baseline_results[input_type][k].keys()):
            if metric.endswith("_values"):
                baseline_values = np.array(baseline_results[input_type][k][metric], dtype=np.float32)
                new_model_values = np.array(new_model_results[input_type][k][metric], dtype=np.float32)
                mean_diff = np.mean(new_model_values - baseline_values)
                p_value = ttest_rel(baseline_values, new_model_values).pvalue

                print(f"{metric} @ {k}: mean(new-baseline) = {mean_diff:.6f} (p={p_value:.6f})")

        print()
    print()
print()

In [9]:
for line in pvalues.stdout.split("\n"):
    print(line)

two-sided p-values

pos_inputs
recall_liked_values @ 10: mean(new-baseline) = 0.000087 (p=0.255296)
recall_disliked_values @ 10: mean(new-baseline) = nan (p=nan)
ndcg_values @ 10: mean(new-baseline) = 0.000045 (p=0.725108)

recall_liked_values @ 20: mean(new-baseline) = 0.000069 (p=0.466905)
recall_disliked_values @ 20: mean(new-baseline) = nan (p=nan)
ndcg_values @ 20: mean(new-baseline) = 0.000022 (p=0.868795)

recall_liked_values @ 50: mean(new-baseline) = 0.000766 (p=0.000000)
recall_disliked_values @ 50: mean(new-baseline) = nan (p=nan)
ndcg_values @ 50: mean(new-baseline) = 0.000556 (p=0.000037)

recall_liked_values @ 100: mean(new-baseline) = 0.002194 (p=0.000000)
recall_disliked_values @ 100: mean(new-baseline) = nan (p=nan)
ndcg_values @ 100: mean(new-baseline) = 0.001486 (p=0.000000)

recall_liked_values @ 200: mean(new-baseline) = 0.005301 (p=0.000000)
recall_disliked_values @ 200: mean(new-baseline) = nan (p=nan)
ndcg_values @ 200: mean(new-baseline) = 0.003263 (p=0.000000)

In [10]:
RESULTS_FOLDER = "./results/yelp2018/"

with open(RESULTS_FOLDER + "hyperparametersYelp.txt", 'w') as f: f.write(hypers.stdout)
with open(RESULTS_FOLDER + "resultsYelp.txt", 'w') as f: f.write(results.stdout)
with open(RESULTS_FOLDER + "pvaluesYelp.txt", "w") as f: f.write(pvalues.stdout)