# MovieLens Experiment

You can load the saved models and reproduce the results:

In [4]:
import pickle
import qgrid
import pandas as pd
from IPython.display import display
from scipy.stats import kendalltau, ttest_rel
from utils import natural_keys


# load results
with open('./data/exp_stra_ml.pkl', 'rb') as exp_file:
    res_stra = pickle.load(exp_file)

    
def get_metric_value(results, model_name, metric, user_base=False):

    for result in results:
        if result.model_name == model_name:
            if user_base:
                return result.metric_user_results[metric]
            else:
                return result.metric_avg_results[metric]

    
METRICS = sorted([m for m in res_stra[0][0].metric_avg_results.keys() if 'SIZE' not in m], 
                 key=natural_keys)

MODELS  = [res[0].model_name for res in res_stra]


idx = 0

df = pd.DataFrame(columns=['METRIC', 'MODEL', 'CLOSED', 'IPS', 'UNBIASED', 'Q1', 'Q2'])

for metric in METRICS:
        
    for model in MODELS:
        
        closed = get_metric_value([res_array[0] for res_array in res_stra], model, metric)
        ips = get_metric_value([res_array[1] for res_array in res_stra], model, metric)
        q1 = get_metric_value([res_array[3] for res_array in res_stra], model, metric)
        q2 = get_metric_value([res_array[4] for res_array in res_stra], model, metric)
        unbiased = get_metric_value([res_array[5] for res_array in res_stra], model, metric)
        
        df.loc[idx] = [metric, model, 
                       '%.3f' %(closed), 
                       '%.3f' %(ips),
                       '%.3f' %(unbiased), 
                       '%.3f' %(q1), 
                       '%.3f' %(q2)]
        idx += 1
    


df[df.METRIC == 'NDCG@-1']


# use the following to interact with the table
# qgrid.show_grid(df[df.METRIC == 'NDCG@-1'])

Unnamed: 0,METRIC,MODEL,CLOSED,IPS,UNBIASED,Q1,Q2
520,NDCG@-1,GA,0.259,0.144,0.255,0.257,0.102
521,NDCG@-1,MPOP,0.373,0.214,0.344,0.340,0.696
522,NDCG@-1,BaselineOnly,0.297,0.158,0.291,0.293,0.156
523,NDCG@-1,MLP,0.414,0.237,0.400,0.401,0.341
524,NDCG@-1,MF10,0.323,0.171,0.316,0.318,0.183
...,...,...,...,...,...,...,...
619,NDCG@-1,NeuMF60,0.385,0.220,0.377,0.379,0.203
620,NDCG@-1,NeuMF70,0.382,0.218,0.374,0.376,0.205
621,NDCG@-1,NeuMF80,0.376,0.215,0.369,0.371,0.193
622,NDCG@-1,NeuMF90,0.369,0.209,0.362,0.364,0.183


If you want to train all the models from scratch, use the following block (it takes a long time to compete the training/evaluation of all available models):

In [None]:
import pickle

from cornac.datasets import movielens
from eval_methods.stratified_evaluation import StratifiedEvaluation
from experiment.experiment import STExperiment
from utils import get_models, get_metrics


# load the movielens dataset
ml = movielens.load_feedback(variant="1M")


# propensity-based stratified evaluation
stra_eval_method = StratifiedEvaluation(data=ml,
                                        n_strata=2,
                                        rating_threshold=4.0,
                                        verbose=True)

# run the experiment
exp_stra = STExperiment(eval_method=stra_eval_method,
                        models=get_models(variant='large'),
                        metrics=get_metrics(variant='large'),
                        verbose=True)

exp_stra.run()

with open('./data/exp_stra_ml.pkl', 'wb') as exp_file:
    pickle.dump(exp_stra.result, exp_file)
