In [1]:
import numpy as np
import pandas as pd

from dataset import UserSessionItemDataset
from evaluation import ndcg
from models import EASE, AbsEASE
from pipelines import hyperparameter_selection, run_test

In [2]:
DATA_FOLDER = "./data/"

df = pd.read_csv(DATA_FOLDER + "ratings_processed_BeerAdvocate_dense.csv")

df.head()

Unnamed: 0,index,brewery_id,brewery_name,timestamp,rating,review_aroma,review_appearance,userId,beer_style,review_palate,review_taste,beer_name,beer_abv,item_id,datetime,sessionId,user_session_id,split
0,662010,3120,Alpine Beer Company,1323364958,1.0,5.0,4.5,01Ryan10,American IPA,5.0,5.0,Duet IPA,7.0,27604,2011-12-08 17:22:38,1,01Ryan10_1,train
1,642923,30,Avery Brewing Company,1314665814,1.0,5.0,4.5,02maxima,American Double / Imperial IPA,4.5,4.5,Maharaja,10.3,22352,2011-08-30 00:56:54,1,02maxima_1,train
2,1220116,4,Allagash Brewing Company,1322615046,1.0,4.0,3.5,02maxima,Belgian Strong Dark Ale,4.5,5.0,Allagash Black - Bourbon Barrel Aged,10.0,48279,2011-11-30 01:04:06,1,02maxima_1,train
3,1529758,45,Brooklyn Brewery,1322621468,1.0,4.0,5.0,02maxima,Russian Imperial Stout,4.0,4.5,Brooklyn Black Chocolate Stout,10.0,680,2011-11-30 02:51:08,1,02maxima_1,train
4,456216,23922,Baxter Brewing Co.,1296047711,1.0,2.0,3.0,04101Brewer,American Pale Ale (APA),3.0,2.0,Pamola Xtra Pale Ale,4.9,65518,2011-01-26 13:15:11,1,04101Brewer_1,train


In [3]:
# keep only columns needed for experiments
COLUMNS = ["user_session_id", "item_id", "rating", "split"]
df = df[COLUMNS]

# create user session enconding and item encoding
user_session_id_to_idx = {user_session_id: idx for idx, user_session_id in enumerate(df['user_session_id'].unique())}
user_session_idx_to_id = {idx: user_session_id for user_session_id, idx in user_session_id_to_idx.items()}

item_id_to_idx = {item_id: idx for idx, item_id in enumerate(df['item_id'].unique())}
item_idx_to_id = {idx: item_id for item_id, idx in item_id_to_idx.items()}

# map values to idx using the above dicts
df["user_session_id"] = df["user_session_id"].map(user_session_id_to_idx)
df["item_id"] = df["item_id"].map(item_id_to_idx)

# get number of unique user_sessions and unique items
n_user_sessions = len(user_session_id_to_idx)
n_items = len(item_id_to_idx)

# instantiate dataset
dataset = UserSessionItemDataset(df[df.split == "train"], df[df.split == "val"], df[df.split == "test"], n_user_sessions, n_items)

In [4]:
%%capture hypers

l2s = [16., 32., 64., 128., 256., 512., 1024., 2048.]

best_l2s = hyperparameter_selection(dataset, l2s, ndcg, k=100)

In [5]:
for line in hypers.stdout.split("\n"):
    print(line)

L2 16.0
Constructing G...
Density of G: 1.1909%
Inverting G...
EASE
ndcg @ 100: 0.16374322598078186 +- 0.003923165848542657
AbsEASE
ndcg @ 100: 0.1861029930028141 +- 0.00453767528687197

L2 32.0
Constructing G...
Density of G: 1.1909%
Inverting G...
EASE
ndcg @ 100: 0.1709555304144424 +- 0.004064227034488454
AbsEASE
ndcg @ 100: 0.18723356492619217 +- 0.004598795872968873

L2 64.0
Constructing G...
Density of G: 1.1909%
Inverting G...
EASE
ndcg @ 100: 0.1790445516188574 +- 0.0041675584362082475
AbsEASE
ndcg @ 100: 0.19004799217846247 +- 0.004550288030395654

L2 128.0
Constructing G...
Density of G: 1.1909%
Inverting G...
EASE
ndcg @ 100: 0.18836253778738826 +- 0.004335244883252825
AbsEASE
ndcg @ 100: 0.1917965562821817 +- 0.004517371896810434

L2 256.0
Constructing G...
Density of G: 1.1909%
Inverting G...
EASE
ndcg @ 100: 0.1965452735612302 +- 0.004444632874751559
AbsEASE
ndcg @ 100: 0.19486430826447637 +- 0.004535735493809437

L2 512.0
Constructing G...
Density of G: 1.1909%
Inverting

In [6]:
%%capture results

models = [("EASE", EASE, best_l2s["EASE"][0]), ("AbsEASE", AbsEASE, best_l2s["AbsEASE"][0])]

run_test(models, dataset, ks=[10,20,50,100,200,500])

In [7]:
for line in results.stdout.split("\n"):
    print(line)

Split test
EASE
Constructing G...
Density of G: 1.1909%
Inverting G...
pos_inputs
recall_liked @ 10: 0.041940356629887855 +- 0.0010753213087758707
recall_disliked @ 10: 0.006823143396721503 +- 0.0011082514761198894
ndcg @ 10: 0.07935220757522787 +- 0.0015853601449459431

recall_liked @ 20: 0.07152994324476995 +- 0.0014973107869473623
recall_disliked @ 20: 0.0119123632517146 +- 0.0014253879036190394
ndcg @ 20: 0.10899195787450193 +- 0.0018747773743765122

recall_liked @ 50: 0.13663367046347247 +- 0.00218687438190656
recall_disliked @ 50: 0.027797326305385756 +- 0.0021541654859246837
ndcg @ 50: 0.16112945283040123 +- 0.0023601902955143776

recall_liked @ 100: 0.21320974226380998 +- 0.0028836701502365425
recall_disliked @ 100: 0.04888382379077256 +- 0.0028963553977615045
ndcg @ 100: 0.21187378227991427 +- 0.0027891622228585953

recall_liked @ 200: 0.3090758821998297 +- 0.0033807146276031443
recall_disliked @ 200: 0.08614193987335163 +- 0.0036875890411844983
ndcg @ 200: 0.2684464027348098 

In [8]:
RESULTS_FOLDER = "./results/beeradvocate_dense/"

with open(RESULTS_FOLDER + "hyperparametersBeerAdvocate_dense.txt", 'w') as f: f.write(hypers.stdout)
with open(RESULTS_FOLDER + "resultsBeerAdvocate_dense.txt", 'w') as f: f.write(results.stdout)