In [1]:
import numpy as np
import pandas as pd

from dataset import UserSessionItemDataset
from evaluation import ndcg
from models import EASE, AbsEASE
from pipelines import hyperparameter_selection, run_test

In [2]:
DATA_FOLDER = "./data/yelp2018/"

df = pd.read_csv(DATA_FOLDER + "ratings_processed_Yelp.csv")

df.head()

Unnamed: 0,userId,item_id,rating,sessionId,user_session_id,split
0,0,0,1.0,1,0_1,train
1,0,16,1.0,1,0_1,train
2,0,14,1.0,1,0_1,train
3,0,13,1.0,1,0_1,train
4,0,12,1.0,1,0_1,train


In [3]:
# keep only columns needed for experiments
COLUMNS = ["user_session_id", "item_id", "rating", "split"]
df = df[COLUMNS]

# create user session enconding and item encoding
user_session_id_to_idx = {user_session_id: idx for idx, user_session_id in enumerate(df['user_session_id'].unique())}
user_session_idx_to_id = {idx: user_session_id for user_session_id, idx in user_session_id_to_idx.items()}

item_id_to_idx = {item_id: idx for idx, item_id in enumerate(df['item_id'].unique())}
item_idx_to_id = {idx: item_id for item_id, idx in item_id_to_idx.items()}

# map values to idx using the above dicts
df["user_session_id"] = df["user_session_id"].map(user_session_id_to_idx)
df["item_id"] = df["item_id"].map(item_id_to_idx)

# get number of unique user_sessions and unique items
n_user_sessions = len(user_session_id_to_idx)
n_items = len(item_id_to_idx)

# instantiate dataset
dataset = UserSessionItemDataset(df[df.split == "train"], df[df.split == "val"], df[df.split == "test"], n_user_sessions, n_items)

In [4]:
%%capture hypers

l2s = [16., 32., 64., 128., 256., 512., 1024., 2048.]

best_l2s = hyperparameter_selection(dataset, l2s, ndcg, k=100)

KeyboardInterrupt: 

In [5]:
for line in hypers.stdout.split("\n"):
    print(line)

L2 16.0
Constructing G...
Density of G: 0.3285%
Inverting G...



In [None]:
%%capture results

models = [("EASE", EASE, best_l2s["EASE"][0]), ("AbsEASE", AbsEASE, best_l2s["AbsEASE"][0])]

run_test(models, dataset, ks=[10,20,50,100,200,500])

In [None]:
for line in results.stdout.split("\n"):
    print(line)

Split test
EASE
Constructing G...
Density of G: 0.8614%
Inverting G...
pos_inputs
recall_liked @ 10: 0.027367960484194983 +- 0.0002858207136123444
recall_disliked @ 10: 0.00937069541021903 +- 0.0002831766889089224
ndcg @ 10: 0.03721101691783137 +- 0.000421343472087699

recall_liked @ 20: 0.04719899963840789 +- 0.0003755001360585637
recall_disliked @ 20: 0.018253255365881977 +- 0.00039382772535589354
ndcg @ 20: 0.051204320978116426 +- 0.000485485134017098

recall_liked @ 50: 0.09431455837017902 +- 0.0005435316673981164
recall_disliked @ 50: 0.04226728854125566 +- 0.0006033763916088162
ndcg @ 50: 0.07738647029900476 +- 0.0006055021518742814

recall_liked @ 100: 0.1545218697145432 +- 0.0007365315501937447
recall_disliked @ 100: 0.0779810022195923 +- 0.0008308543736018609
ndcg @ 100: 0.10489852018470314 +- 0.000738417535186902

recall_liked @ 200: 0.24364567957323457 +- 0.001005046469844797
recall_disliked @ 200: 0.1379936945001327 +- 0.0011120954874125207
ndcg @ 200: 0.1390009486243042 +-

In [None]:
RESULTS_FOLDER = "./results/yelp2018/"

with open(RESULTS_FOLDER + "hyperparametersYelp.txt", 'w') as f: f.write(hypers.stdout)
with open(RESULTS_FOLDER + "resultsYelp.txt", 'w') as f: f.write(results.stdout)