In [1]:
import numpy as np
import pandas as pd
from scipy.stats import ttest_rel

from dataset import UserSessionItemDataset
from evaluation import ndcg
from models import EASE, AbsEASE
from pipelines import hyperparameter_selection, run_test

In [2]:
DATA_FOLDER = "./data/beeradvocate/"

df = pd.read_csv(DATA_FOLDER + "ratings_processed_BeerAdvocate_dense.csv")

df.head()

Unnamed: 0,index,brewery_id,brewery_name,timestamp,rating,review_aroma,review_appearance,userId,beer_style,review_palate,review_taste,beer_name,beer_abv,item_id,datetime,sessionId,user_session_id,split
0,662010,3120,Alpine Beer Company,1323364958,1.0,5.0,4.5,01Ryan10,American IPA,5.0,5.0,Duet IPA,7.0,27604,2011-12-08 17:22:38,1,01Ryan10_1,train
1,642923,30,Avery Brewing Company,1314665814,1.0,5.0,4.5,02maxima,American Double / Imperial IPA,4.5,4.5,Maharaja,10.3,22352,2011-08-30 00:56:54,1,02maxima_1,train
2,1220116,4,Allagash Brewing Company,1322615046,1.0,4.0,3.5,02maxima,Belgian Strong Dark Ale,4.5,5.0,Allagash Black - Bourbon Barrel Aged,10.0,48279,2011-11-30 01:04:06,1,02maxima_1,train
3,1529758,45,Brooklyn Brewery,1322621468,1.0,4.0,5.0,02maxima,Russian Imperial Stout,4.0,4.5,Brooklyn Black Chocolate Stout,10.0,680,2011-11-30 02:51:08,1,02maxima_1,train
4,456216,23922,Baxter Brewing Co.,1296047711,1.0,2.0,3.0,04101Brewer,American Pale Ale (APA),3.0,2.0,Pamola Xtra Pale Ale,4.9,65518,2011-01-26 13:15:11,1,04101Brewer_1,train


In [3]:
# keep only columns needed for experiments
COLUMNS = ["user_session_id", "item_id", "rating", "split"]
df = df[COLUMNS]

# create user session enconding and item encoding
user_session_id_to_idx = {user_session_id: idx for idx, user_session_id in enumerate(df['user_session_id'].unique())}
user_session_idx_to_id = {idx: user_session_id for user_session_id, idx in user_session_id_to_idx.items()}

item_id_to_idx = {item_id: idx for idx, item_id in enumerate(df['item_id'].unique())}
item_idx_to_id = {idx: item_id for item_id, idx in item_id_to_idx.items()}

# map values to idx using the above dicts
df["user_session_id"] = df["user_session_id"].map(user_session_id_to_idx)
df["item_id"] = df["item_id"].map(item_id_to_idx)

# get number of unique user_sessions and unique items
n_user_sessions = len(user_session_id_to_idx)
n_items = len(item_id_to_idx)

# instantiate dataset
dataset = UserSessionItemDataset(df[df.split == "train"], df[df.split == "val"], df[df.split == "test"], n_user_sessions, n_items)

In [4]:
%%capture hypers

l2s = [16., 32., 64., 128., 256., 512., 1024., 2048.]

best_l2s = hyperparameter_selection(dataset, l2s, ndcg, k=100)

In [5]:
for line in hypers.stdout.split("\n"):
    print(line)

L2 16.0
Constructing G...
Density of G: 1.1909%
Inverting G...
EASE
ndcg @ 100: 0.16374322598078186 +- 0.003923165848542657
AbsEASE
ndcg @ 100: 0.1861029930028141 +- 0.00453767528687197

L2 32.0
Constructing G...
Density of G: 1.1909%
Inverting G...
EASE
ndcg @ 100: 0.1709555304144424 +- 0.004064227034488454
AbsEASE
ndcg @ 100: 0.18723356492619217 +- 0.004598795872968873

L2 64.0
Constructing G...
Density of G: 1.1909%
Inverting G...
EASE
ndcg @ 100: 0.1790445516188574 +- 0.0041675584362082475
AbsEASE
ndcg @ 100: 0.19004799217846247 +- 0.004550288030395654

L2 128.0
Constructing G...
Density of G: 1.1909%
Inverting G...
EASE
ndcg @ 100: 0.18836253778738826 +- 0.004335244883252825
AbsEASE
ndcg @ 100: 0.1917965562821817 +- 0.004517371896810434

L2 256.0
Constructing G...
Density of G: 1.1909%
Inverting G...
EASE
ndcg @ 100: 0.1965452735612302 +- 0.004444632874751559
AbsEASE
ndcg @ 100: 0.19486430826447637 +- 0.004535735493809437

L2 512.0
Constructing G...
Density of G: 1.1909%
Inverting

In [4]:
%%capture results

models = [("EASE", EASE, best_l2s["EASE"][0]), ("AbsEASE", AbsEASE, best_l2s["AbsEASE"][0])]

results_dict = run_test(models, dataset, ks=[10,20,50,100,200,500])

In [5]:
for line in results.stdout.split("\n"):
    print(line)

Split test
EASE
Constructing G...
Density of G: 1.1909%
Inverting G...
pos_inputs
recall_liked @ 10: 0.042856859668588904 +- 0.0011262059604407345
recall_disliked @ 10: 0.006757052915668679 +- 0.001044052358177076
ndcg @ 10: 0.08000378128072888 +- 0.001644242519570384

recall_liked @ 20: 0.07238152375724556 +- 0.0014960748328289653
recall_disliked @ 20: 0.012997778527758738 +- 0.001456053968559318
ndcg @ 20: 0.10933966020815296 +- 0.0019122091492103413

recall_liked @ 50: 0.1361698913892164 +- 0.002165306560154339
recall_disliked @ 50: 0.02639032031759307 +- 0.002044412619774605
ndcg @ 50: 0.16117662007628497 +- 0.002399797676107992

recall_liked @ 100: 0.20965092269116467 +- 0.0027736308619838368
recall_disliked @ 100: 0.04976932528266372 +- 0.002758897301383768
ndcg @ 100: 0.21060788113876486 +- 0.0028219264689291045

recall_liked @ 200: 0.3043265611346986 +- 0.0033288679581603736
recall_disliked @ 200: 0.08808540439210666 +- 0.0037387803835284765
ndcg @ 200: 0.26694490975089585 +- 0

In [6]:
%%capture pvalues

baseline, new_model = models[0][0], models[1][0]
baseline_results, new_model_results = results_dict[baseline], results_dict[new_model]

print("two-sided p-values\n")
for input_type in list(baseline_results.keys()):
    print(input_type)
    for k in list(baseline_results[input_type].keys()):
        for metric in list(baseline_results[input_type][k].keys()):
            if metric.endswith("_values"):
                baseline_values = np.array(baseline_results[input_type][k][metric], dtype=np.float32)
                new_model_values = np.array(new_model_results[input_type][k][metric], dtype=np.float32)
                mean_diff = np.mean(new_model_values - baseline_values)
                p_value = ttest_rel(baseline_values, new_model_values).pvalue

                print(f"{metric} @ {k}: mean(new-baseline) = {mean_diff:.6f} (p={p_value:.6f})")

        print()
    print()
print()

In [7]:
for line in pvalues.stdout.split("\n"):
    print(line)

two-sided p-values

pos_inputs
recall_liked_values @ 10: mean(new-baseline) = -0.001023 (p=0.019072)
recall_disliked_values @ 10: mean(new-baseline) = -0.001171 (p=0.065395)
ndcg_values @ 10: mean(new-baseline) = -0.002333 (p=0.000010)

recall_liked_values @ 20: mean(new-baseline) = -0.001079 (p=0.044092)
recall_disliked_values @ 20: mean(new-baseline) = -0.001470 (p=0.105959)
ndcg_values @ 20: mean(new-baseline) = -0.002673 (p=0.000000)

recall_liked_values @ 50: mean(new-baseline) = -0.001680 (p=0.019711)
recall_disliked_values @ 50: mean(new-baseline) = -0.001821 (p=0.088775)
ndcg_values @ 50: mean(new-baseline) = -0.003922 (p=0.000000)

recall_liked_values @ 100: mean(new-baseline) = -0.001112 (p=0.230091)
recall_disliked_values @ 100: mean(new-baseline) = -0.002332 (p=0.112351)
ndcg_values @ 100: mean(new-baseline) = -0.004381 (p=0.000000)

recall_liked_values @ 200: mean(new-baseline) = 0.000042 (p=0.971294)
recall_disliked_values @ 200: mean(new-baseline) = -0.003814 (p=0.072583

In [8]:
RESULTS_FOLDER = "./results/beeradvocate_dense/"

with open(RESULTS_FOLDER + "hyperparametersBeerAdvocate_dense.txt", 'w') as f: f.write(hypers.stdout)
with open(RESULTS_FOLDER + "resultsBeerAdvocate_dense.txt", 'w') as f: f.write(results.stdout)
with open(RESULTS_FOLDER + "pvaluesBeerAdvocate_dense.txt", "w") as f: f.write(pvalues.stdout)