In [1]:
import numpy as np
import pandas as pd

from dataset import UserSessionItemDataset
from evaluation import ndcg
from models import EASE, AbsEASE
from pipelines import hyperparameter_selection, run_test

In [2]:
DATA_FOLDER = "./data/"

df = pd.read_csv(DATA_FOLDER + "ratings_processed_MovieLens_dense.csv")

df.head()

Unnamed: 0,userId,item_id,rating,timestamp,datetime,sessionId,user_session_id,split
0,100007,104879,1.0,1430699422,2015-05-04 00:30:22,1,100007_1,train
1,100007,106100,1.0,1430699437,2015-05-04 00:30:37,1,100007_1,train
2,100007,111759,1.0,1430699444,2015-05-04 00:30:44,1,100007_1,train
3,100007,91529,1.0,1430699446,2015-05-04 00:30:46,1,100007_1,train
4,100007,105504,1.0,1430699451,2015-05-04 00:30:51,1,100007_1,train


In [3]:
# keep only columns needed for experiments
COLUMNS = ["user_session_id", "item_id", "rating", "split"]
df = df[COLUMNS]

# create user session enconding and item encoding
user_session_id_to_idx = {user_session_id: idx for idx, user_session_id in enumerate(df['user_session_id'].unique())}
user_session_idx_to_id = {idx: user_session_id for user_session_id, idx in user_session_id_to_idx.items()}

item_id_to_idx = {item_id: idx for idx, item_id in enumerate(df['item_id'].unique())}
item_idx_to_id = {idx: item_id for item_id, idx in item_id_to_idx.items()}

# map values to idx using the above dicts
df["user_session_id"] = df["user_session_id"].map(user_session_id_to_idx)
df["item_id"] = df["item_id"].map(item_id_to_idx)

# get number of unique user_sessions and unique items
n_user_sessions = len(user_session_id_to_idx)
n_items = len(item_id_to_idx)

# instantiate dataset
dataset = UserSessionItemDataset(df[df.split == "train"], df[df.split == "val"], df[df.split == "test"], n_user_sessions, n_items)

In [4]:
%%capture hypers

l2s = [16., 32., 64., 128., 256., 512., 1024., 2048.]

best_l2s = hyperparameter_selection(dataset, l2s, ndcg, k=100)

In [5]:
for line in hypers.stdout.split("\n"):
    print(line)

L2 16.0
Constructing G...
Density of G: 2.2862%
Inverting G...
EASE
ndcg @ 100: 0.30877934514010524 +- 0.0020116743563789513
AbsEASE
ndcg @ 100: 0.32841792942651227 +- 0.002196018757942943

L2 32.0
Constructing G...
Density of G: 2.2862%
Inverting G...
EASE
ndcg @ 100: 0.31854823101105245 +- 0.0020044417690727556
AbsEASE
ndcg @ 100: 0.3327411232188756 +- 0.002187747292458722

L2 64.0
Constructing G...
Density of G: 2.2862%
Inverting G...
EASE
ndcg @ 100: 0.32808652103593655 +- 0.0020083638246010207
AbsEASE
ndcg @ 100: 0.33647197958784164 +- 0.002173423457679356

L2 128.0
Constructing G...
Density of G: 2.2862%
Inverting G...
EASE
ndcg @ 100: 0.3370488372753557 +- 0.0020348099892300575
AbsEASE
ndcg @ 100: 0.33980577254048216 +- 0.002173095082133281

L2 256.0
Constructing G...
Density of G: 2.2862%
Inverting G...
EASE
ndcg @ 100: 0.34579986837418425 +- 0.0020602682574403903
AbsEASE
ndcg @ 100: 0.3429952917742206 +- 0.0021699410235596433

L2 512.0
Constructing G...
Density of G: 2.2862%
I

In [6]:
%%capture results

models = [("EASE", EASE, best_l2s["EASE"][0]), ("AbsEASE", AbsEASE, best_l2s["AbsEASE"][0])]

run_test(models, dataset, ks=[10,20,50,100,200,500])

In [7]:
for line in results.stdout.split("\n"):
    print(line)

Split test
EASE
Constructing G...
Density of G: 2.2862%
Inverting G...
pos_inputs
recall_liked @ 10: 0.0966423714955509 +- 0.0006048672506785888
recall_disliked @ 10: 0.016735966447383985 +- 0.0004894501419696854
ndcg @ 10: 0.15831096982929171 +- 0.0008262762782482457

recall_liked @ 20: 0.15850019261528384 +- 0.000826559850017724
recall_disliked @ 20: 0.033387647465186514 +- 0.0007020575428607767
ndcg @ 20: 0.21015763217379518 +- 0.0009619261425737555

recall_liked @ 50: 0.28352870710927675 +- 0.0011447025286009792
recall_disliked @ 50: 0.08030630650521578 +- 0.0011024437642918846
ndcg @ 50: 0.2927802521828431 +- 0.0011603697145436914

recall_liked @ 100: 0.4107554497232027 +- 0.0013290227802492584
recall_disliked @ 100: 0.14624651310089767 +- 0.001487812779478022
ndcg @ 100: 0.3610643368682215 +- 0.0013301062358884483

recall_liked @ 200: 0.5497049729542183 +- 0.001386967678348829
recall_disliked @ 200: 0.24169077554714016 +- 0.0018063574090692275
ndcg @ 200: 0.423865788894723 +- 0.0

In [8]:
RESULTS_FOLDER = "./results/"

with open(RESULTS_FOLDER + "hyperparametersMovieLens_dense.txt", 'w') as f: f.write(hypers.stdout)
with open(RESULTS_FOLDER + "resultsMovieLens_dense.txt", 'w') as f: f.write(results.stdout)