In [1]:
import numpy as np
import pandas as pd

from dataset import UserSessionItemDataset
from evaluation import ndcg
from models import EASE, AbsEASE
from pipelines import hyperparameter_selection, run_test

In [2]:
DATA_FOLDER = "./data/"

df = pd.read_csv(DATA_FOLDER + "ratings_processed_BeerAdvocate.csv")

df.head()

Unnamed: 0,index,brewery_id,brewery_name,timestamp,rating,review_aroma,review_appearance,userId,beer_style,review_palate,review_taste,beer_name,beer_abv,item_id,datetime,sessionId,user_session_id,split
0,393434,3,Abita Brewing Co.,1200894871,1.0,4.0,4.0,oscplayr25,Vienna Lager,3.0,3.5,Amber,4.5,5,2008-01-21 05:54:31,1,oscplayr25_1,train
1,393306,3,Abita Brewing Co.,1265575690,1.0,3.5,3.5,Tinfoilrcr,Vienna Lager,3.0,3.5,Amber,4.5,5,2010-02-07 20:48:10,1,Tinfoilrcr_1,train
2,393355,3,Abita Brewing Co.,1233375682,1.0,3.0,3.5,dborginis,Vienna Lager,4.5,4.0,Amber,4.5,5,2009-01-31 04:21:22,1,dborginis_1,train
3,393405,3,Abita Brewing Co.,1213107071,1.0,4.0,4.0,BrewYa,Vienna Lager,3.0,3.5,Amber,4.5,5,2008-06-10 14:11:11,1,BrewYa_1,train
4,393411,3,Abita Brewing Co.,1210786743,1.0,3.0,3.5,brianj555,Vienna Lager,4.0,4.0,Amber,4.5,5,2008-05-14 17:39:03,1,brianj555_1,train


In [3]:
# keep only columns needed for experiments
COLUMNS = ["user_session_id", "item_id", "rating", "split"]
df = df[COLUMNS]

# create user session enconding and item encoding
user_session_id_to_idx = {user_session_id: idx for idx, user_session_id in enumerate(df['user_session_id'].unique())}
user_session_idx_to_id = {idx: user_session_id for user_session_id, idx in user_session_id_to_idx.items()}

item_id_to_idx = {item_id: idx for idx, item_id in enumerate(df['item_id'].unique())}
item_idx_to_id = {idx: item_id for item_id, idx in item_id_to_idx.items()}

# map values to idx using the above dicts
df["user_session_id"] = df["user_session_id"].map(user_session_id_to_idx)
df["item_id"] = df["item_id"].map(item_id_to_idx)

# get number of unique user_sessions and unique items
n_user_sessions = len(user_session_id_to_idx)
n_items = len(item_id_to_idx)

# instantiate dataset
dataset = UserSessionItemDataset(df[df.split == "train"], df[df.split == "val"], df[df.split == "test"], n_user_sessions, n_items)

In [4]:
%%capture hypers

l2s = [16., 32., 64., 128., 256., 512., 1024., 2048.]

best_l2s = hyperparameter_selection(dataset, l2s, ndcg, k=100)

In [5]:
for line in hypers.stdout.split("\n"):
    print(line)

L2 16.0
Constructing G...
Density of G: 0.2711%
Inverting G...
EASE
ndcg @ 100: 0.11010977970627316 +- 0.0029537230973085537
AbsEASE
ndcg @ 100: 0.15210648519025716 +- 0.0040491685114193935

L2 32.0
Constructing G...
Density of G: 0.2711%
Inverting G...
EASE
ndcg @ 100: 0.11712577613281115 +- 0.003113120482480139
AbsEASE
ndcg @ 100: 0.15313110907880964 +- 0.004014097795644736

L2 64.0
Constructing G...
Density of G: 0.2711%
Inverting G...
EASE
ndcg @ 100: 0.12310681695639915 +- 0.003276064746597165
AbsEASE
ndcg @ 100: 0.1522630462908202 +- 0.003962465269139093

L2 128.0
Constructing G...
Density of G: 0.2711%
Inverting G...
EASE
ndcg @ 100: 0.13016416386874663 +- 0.0034542398783873837
AbsEASE
ndcg @ 100: 0.1521312066641572 +- 0.003967371754559834

L2 256.0
Constructing G...
Density of G: 0.2711%
Inverting G...
EASE
ndcg @ 100: 0.1359355808803027 +- 0.0035971510455619013
AbsEASE
ndcg @ 100: 0.15041141543936382 +- 0.003909926992710809

L2 512.0
Constructing G...
Density of G: 0.2711%
Inv

In [6]:
%%capture results

models = [("EASE", EASE, best_l2s["EASE"][0]), ("AbsEASE", AbsEASE, best_l2s["AbsEASE"][0])]

run_test(models, dataset, ks=[10,20,50,100,200,500])

In [7]:
for line in results.stdout.split("\n"):
    print(line)

Split test
EASE
Constructing G...
Density of G: 0.2711%
Inverting G...
pos_inputs
recall_liked @ 10: 0.025050836986159068 +- 0.0007182531921324155
recall_disliked @ 10: 0.003930217457281798 +- 0.0007736986974788041
ndcg @ 10: 0.04998882267835947 +- 0.001168767128107469

recall_liked @ 20: 0.04292634189293037 +- 0.0009266173813565513
recall_disliked @ 20: 0.00833002282859487 +- 0.0010947426704134423
ndcg @ 20: 0.06946865277768578 +- 0.001386015747021338

recall_liked @ 50: 0.08655658645567431 +- 0.001456847546259461
recall_disliked @ 50: 0.01760437656676048 +- 0.0015655440501033348
ndcg @ 50: 0.10642744588864496 +- 0.0018101900486347075

recall_liked @ 100: 0.13773059601807341 +- 0.0020227682828281926
recall_disliked @ 100: 0.031226685432945215 +- 0.0021294658915837227
ndcg @ 100: 0.14341039900073133 +- 0.0022415490711474935

recall_liked @ 200: 0.2132012481071339 +- 0.0026560767354704027
recall_disliked @ 200: 0.05455145132566641 +- 0.0027412319297446842
ndcg @ 200: 0.19000870197774913

In [8]:
RESULTS_FOLDER = "./results/beeradvocate/"

with open(RESULTS_FOLDER + "hyperparametersBeerAdvocate.txt", 'w') as f: f.write(hypers.stdout)
with open(RESULTS_FOLDER + "resultsBeerAdvocate.txt", 'w') as f: f.write(results.stdout)