In [1]:
import numpy as np
import pandas as pd
import json
import pprint
import json
pp = pprint.PrettyPrinter(indent=4)
pd.options.display.max_colwidth = 100

In [2]:
def df_mean(path, apply_to_df):
    df_mean = pd.read_csv(path)
    df_mean = apply_to_df(df_mean)
        
    if 'nb_movies_not_in_train' in df_mean:
        df_mean = df_mean.drop(['nb_movies_not_in_train'],axis=1)
    if 'nb_test_ratings' in df_mean:
        df_mean = df_mean.drop(['nb_test_ratings'],axis=1)
    if 'nb_train_ratings' in df_mean:
        df_mean = df_mean.drop(['nb_train_ratings'],axis=1)
    if 'user_id' in df_mean:
        df_mean = df_mean.drop(['user_id'],axis=1)
    
    df_mean = df_mean.mean(0).to_frame().T
    df_mean['path'] = path
    
    return df_mean
    

def get_df_results(results, max_train_ratings=None):
    if not max_train_ratings:
        apply_to_df = lambda df: df[df['nb_test_ratings'] > 0]
    else:
        apply_to_df = lambda df: df[(df['nb_test_ratings'] > 0) & (df['nb_train_ratings'] <= max_train_ratings)]
        
    result_map = map(lambda path: df_mean(path, apply_to_df), results)
    return reduce(lambda x, y: x.append(y), result_map)

## ML-100k - 0.7 train / 0.3 test - sparse item

#### User Metrics

In [35]:
user_metrics = [
'../metrics/mpcf-si/2016-05-27_19.53.55_si_ml-100k_e20_tt-0.7_task-9_user-metrics.csv',
'../metrics/mpcf-si/2016-05-27_19.59.33_si_ml-100k_e20_tt-0.7_task-15_user-metrics.csv',
'../metrics/mpcf-si/2016-05-27_20.08.37_si_ml-100k_e20_tt-0.7_task-21_user-metrics.csv',
'../metrics/mpcf-si/2016-05-27_20.12.44_si_ml-100k_e20_tt-0.7_task-18_user-metrics.csv',
'../metrics/mpcf-si/2016-05-27_20.14.47_si_ml-100k_e20_tt-0.7_task-11_user-metrics.csv',
'../metrics/mpcf-si/2016-05-27_20.14.56_si_ml-100k_e20_tt-0.7_task-16_user-metrics.csv',
'../metrics/mpcf-si/2016-05-27_20.22.56_si_ml-100k_e20_tt-0.7_task-13_user-metrics.csv',
'../metrics/mpcf-si/2016-05-27_20.57.19_si_ml-100k_e20_tt-0.7_task-1_user-metrics.csv',
'../metrics/mpcf-si/2016-05-27_21.01.53_si_ml-100k_e20_tt-0.7_task-19_user-metrics.csv',
'../metrics/mpcf-si/2016-05-27_21.03.20_si_ml-100k_e20_tt-0.7_task-12_user-metrics.csv',
'../metrics/mpcf-si/2016-05-27_21.04.23_si_ml-100k_e20_tt-0.7_task-4_user-metrics.csv',
'../metrics/mpcf-si/2016-05-27_21.33.24_si_ml-100k_e20_tt-0.7_task-7_user-metrics.csv',
'../metrics/mpcf-si/2016-05-27_21.44.02_si_ml-100k_e20_tt-0.7_task-3_user-metrics.csv',
'../metrics/mpcf-si/2016-05-27_23.21.40_si_ml-100k_e20_tt-0.7_task-2_user-metrics.csv',
'../metrics/mpcf-si/2016-05-27_23.49.17_si_ml-100k_e20_tt-0.7_task-23_user-metrics.csv',
'../metrics/mpcf-si/2016-05-28_01.30.30_si_ml-100k_e20_tt-0.7_task-14_user-metrics.csv',
'../metrics/mpcf-si/2016-05-28_02.47.05_si_ml-100k_e20_tt-0.7_task-22_user-metrics.csv',
'../metrics/mpcf-si/2016-05-28_03.04.16_si_ml-100k_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-05-28_03.09.13_si_ml-100k_e20_tt-0.7_task-17_user-metrics.csv',
'../metrics/mpcf-si/2016-05-28_04.43.53_si_ml-100k_e20_tt-0.7_task-5_user-metrics.csv',
'../metrics/mpcf-si/2016-05-30_17.34.49_si_ml-100k_e20_tt-0.7_task-23_user-metrics.csv',
'../metrics/mpcf-si/2016-05-30_17.49.56_si_ml-100k_e20_tt-0.7_task-21_user-metrics.csv',
'../metrics/mpcf-si/2016-05-30_18.00.20_si_ml-100k_e20_tt-0.7_task-3_user-metrics.csv',
'../metrics/mpcf-si/2016-05-30_18.31.35_si_ml-100k_e20_tt-0.7_task-9_user-metrics.csv',
'../metrics/mpcf-si/2016-05-30_18.42.24_si_ml-100k_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-05-30_19.38.01_si_ml-100k_e20_tt-0.7_task-14_user-metrics.csv',
'../metrics/mpcf-si/2016-05-30_19.55.26_si_ml-100k_e20_tt-0.7_task-13_user-metrics.csv',
'../metrics/mpcf-si/2016-05-30_20.24.05_si_ml-100k_e20_tt-0.7_task-17_user-metrics.csv',
'../metrics/mpcf-si/2016-05-30_21.22.49_si_ml-100k_e20_tt-0.7_task-15_user-metrics.csv',
'../metrics/mpcf-si/2016-05-30_23.07.20_si_ml-100k_e20_tt-0.7_task-11_user-metrics.csv',
'../metrics/mpcf-si/2016-05-31_01.29.40_si_ml-100k_e20_tt-0.7_task-19_user-metrics.csv',
'../metrics/mpcf-si/2016-05-31_02.24.06_si_ml-100k_e20_tt-0.7_task-4_user-metrics.csv',
'../metrics/mpcf-si/2016-05-31_03.42.06_si_ml-100k_e20_tt-0.7_task-22_user-metrics.csv',
'../metrics/mpcf-si/2016-05-31_04.19.08_si_ml-100k_e20_tt-0.7_task-7_user-metrics.csv',
'../metrics/mpcf-si/2016-05-31_05.11.15_si_ml-100k_e20_tt-0.7_task-10_user-metrics.csv',
'../metrics/mpcf-si/2016-05-31_07.32.49_si_ml-100k_e20_tt-0.7_task-5_user-metrics.csv',
'../metrics/mpcf-si/2016-05-31_08.37.36_si_ml-100k_e20_tt-0.7_task-18_user-metrics.csv',
'../metrics/mpcf-si/2016-05-31_08.44.16_si_ml-100k_e20_tt-0.7_task-1_user-metrics.csv',
'../metrics/mpcf-si/2016-05-31_08.54.39_si_ml-100k_e20_tt-0.7_task-12_user-metrics.csv',
'../metrics/mpcf-si/2016-05-31_09.23.30_si_ml-100k_e20_tt-0.7_task-16_user-metrics.csv',
'../metrics/mpcf-si/2016-05-31_11.40.56_si_ml-100k_e20_tt-0.7_task-20_user-metrics.csv',
'../metrics/mpcf-si/2016-05-31_12.18.25_si_ml-100k_e20_tt-0.7_task-2_user-metrics.csv',
'../metrics/mpcf-si/2016-05-31_12.42.06_si_ml-100k_e20_tt-0.7_task-8_user-metrics.csv',
'../metrics/mpcf-si/2016-05-31_15.17.28_si_ml-100k_e20_tt-0.7_task-6_user-metrics.csv',
]

In [36]:
df_user_metrics = get_df_results(user_metrics)
df_user_metrics.sort_values('auc', ascending=False)

Unnamed: 0,auc,avg_precision,f1,fcp,precision_at_20,recall_at_20,reciprocal_rank,spearman_rank_corr,path
0,0.934683,0.31837,0.272404,0.717828,0.268505,0.422799,0.655531,0.199437,../metrics/mpcf-si/2016-05-27_20.22.56_si_ml-100k_e20_tt-0.7_task-13_user-metrics.csv
0,0.934027,0.316369,0.272239,0.717112,0.267126,0.425697,0.645786,0.197818,../metrics/mpcf-si/2016-05-27_19.59.33_si_ml-100k_e20_tt-0.7_task-15_user-metrics.csv
0,0.932968,0.329223,0.279272,0.718236,0.274761,0.431895,0.670167,0.199318,../metrics/mpcf-si/2016-05-28_03.09.13_si_ml-100k_e20_tt-0.7_task-17_user-metrics.csv
0,0.932926,0.300564,0.260747,0.718616,0.255992,0.40727,0.626905,0.199396,../metrics/mpcf-si/2016-05-30_23.07.20_si_ml-100k_e20_tt-0.7_task-11_user-metrics.csv
0,0.929294,0.331451,0.281891,0.721636,0.276776,0.440123,0.670629,0.209864,../metrics/mpcf-si/2016-05-27_21.33.24_si_ml-100k_e20_tt-0.7_task-7_user-metrics.csv
0,0.927645,0.352963,0.294807,0.716287,0.289873,0.452282,0.704771,0.194335,../metrics/mpcf-si/2016-05-27_20.12.44_si_ml-100k_e20_tt-0.7_task-18_user-metrics.csv
0,0.924666,0.312589,0.272404,0.710594,0.267762,0.422716,0.639963,0.176529,../metrics/mpcf-si/2016-05-31_05.11.15_si_ml-100k_e20_tt-0.7_task-10_user-metrics.csv
0,0.923418,0.285442,0.252855,0.717098,0.25106,0.386673,0.617678,0.195425,../metrics/mpcf-si/2016-05-30_18.00.20_si_ml-100k_e20_tt-0.7_task-3_user-metrics.csv
0,0.922296,0.293043,0.251879,0.745679,0.250318,0.388174,0.641182,0.280479,../metrics/mpcf-si/2016-05-31_09.23.30_si_ml-100k_e20_tt-0.7_task-16_user-metrics.csv
0,0.921748,0.328802,0.275507,0.746899,0.273542,0.418921,0.674884,0.284524,../metrics/mpcf-si/2016-05-27_21.03.20_si_ml-100k_e20_tt-0.7_task-12_user-metrics.csv


In [8]:
# best config 
with open('../models/mpcf-si/2016-05-27_20.22.56_si_ml-100k_e20_tt-0.7_task-13_config.json') as f:
    pp.pprint(json.loads(f.read()))

{   u'ada_eps': 1e-06,
    u'adagrad': True,
    u'binarize': True,
    u'binarize_neg': 0,
    u'binarize_pos': 1,
    u'binarize_threshold': 1,
    u'd2v_model': u'doc2vec-models/2016-04-14_17.36.08_20e_pv-dbow_size50_lr0.025_window8_neg5',
    u'eval_in_parallel': False,
    u'experiment_name': u'si_ml-100k_e20_tt-0.7_task-13',
    u'hit_threshold': 4,
    u'init_params_scale': 0.001,
    u'lr': 0.01,
    u'lr_decay': 0.02,
    u'metrics_save_dir': u'metrics/mpcf-si',
    u'model_save_dir': u'models/mpcf-si',
    u'nb_d2v_features': 50,
    u'nb_epochs': 20,
    u'nb_latent_f': 128,
    u'nb_user_pref': 4,
    u'precision_recall_at_n': 20,
    u'ratings_path': u'data/splits/ml-100k/ratings.csv',
    u'reg_lambda': 0.01,
    u'run_eval': True,
    u'run_movie_metrics': False,
    u'si_lambda_delta_qi': 0.001,
    u'si_lr': 0.0003,
    u'si_lr_decay': 0.0005,
    u'si_model': True,
    u'si_nn': [128, 50],
    u'si_reg_lambda': 0.001,
    u'sparse_item': True,
    u'test': True,
    u

In [9]:
with open('../models/mpcf-si/2016-05-27_19.59.33_si_ml-100k_e20_tt-0.7_task-15_config.json') as f:
    pp.pprint(json.loads(f.read()))

{   u'ada_eps': 1e-06,
    u'adagrad': True,
    u'binarize': True,
    u'binarize_neg': 0,
    u'binarize_pos': 1,
    u'binarize_threshold': 1,
    u'd2v_model': u'doc2vec-models/2016-04-14_17.36.08_20e_pv-dbow_size50_lr0.025_window8_neg5',
    u'eval_in_parallel': False,
    u'experiment_name': u'si_ml-100k_e20_tt-0.7_task-15',
    u'hit_threshold': 4,
    u'init_params_scale': 0.001,
    u'lr': 0.01,
    u'lr_decay': 0.0005,
    u'metrics_save_dir': u'metrics/mpcf-si',
    u'model_save_dir': u'models/mpcf-si',
    u'nb_d2v_features': 50,
    u'nb_epochs': 20,
    u'nb_latent_f': 128,
    u'nb_user_pref': 2,
    u'precision_recall_at_n': 20,
    u'ratings_path': u'data/splits/ml-100k/ratings.csv',
    u'reg_lambda': 0.003,
    u'run_eval': True,
    u'run_movie_metrics': False,
    u'si_lambda_delta_qi': 0.001,
    u'si_lr': 0.001,
    u'si_lr_decay': 0.02,
    u'si_model': True,
    u'si_nn': [128, 50],
    u'si_reg_lambda': 0.003,
    u'sparse_item': True,
    u'test': True,
    u

In [28]:
with open('../models/mpcf-si/2016-05-28_03.09.13_si_ml-100k_e20_tt-0.7_task-17_config.json') as f:
    pp.pprint(json.loads(f.read()))

{   u'ada_eps': 1e-06,
    u'adagrad': True,
    u'binarize': True,
    u'binarize_neg': 0,
    u'binarize_pos': 1,
    u'binarize_threshold': 1,
    u'd2v_model': u'doc2vec-models/2016-04-14_17.36.08_20e_pv-dbow_size50_lr0.025_window8_neg5',
    u'eval_in_parallel': False,
    u'experiment_name': u'si_ml-100k_e20_tt-0.7_task-17',
    u'hit_threshold': 4,
    u'init_params_scale': 0.001,
    u'lr': 0.01,
    u'lr_decay': 0.02,
    u'metrics_save_dir': u'metrics/mpcf-si',
    u'model_save_dir': u'models/mpcf-si',
    u'nb_d2v_features': 50,
    u'nb_epochs': 20,
    u'nb_latent_f': 128,
    u'nb_user_pref': 2,
    u'precision_recall_at_n': 20,
    u'ratings_path': u'data/splits/ml-100k/ratings.csv',
    u'reg_lambda': 0.003,
    u'run_eval': True,
    u'run_movie_metrics': False,
    u'si_lambda_delta_qi': 0.001,
    u'si_lr': 0.003,
    u'si_lr_decay': 0.0005,
    u'si_model': True,
    u'si_nn': [128, 160, 50],
    u'si_reg_lambda': 0.001,
    u'sparse_item': True,
    u'test': True,


In [37]:
df_max_train = get_df_results(user_metrics, max_train_ratings=10)
df_max_train.sort_values('auc', ascending=False)

Unnamed: 0,auc,avg_precision,f1,fcp,precision_at_20,recall_at_20,reciprocal_rank,spearman_rank_corr,path
0,0.972357,0.211164,0.283847,0.628358,0.1875,0.607143,0.410714,0.051496,../metrics/mpcf-si/2016-05-31_09.23.30_si_ml-100k_e20_tt-0.7_task-16_user-metrics.csv
0,0.972353,0.203003,0.22758,0.60902,0.15,0.494048,0.370833,-0.002982,../metrics/mpcf-si/2016-05-31_07.32.49_si_ml-100k_e20_tt-0.7_task-5_user-metrics.csv
0,0.970891,0.291967,0.28456,0.642613,0.1875,0.613095,0.598214,0.081329,../metrics/mpcf-si/2016-05-27_19.59.33_si_ml-100k_e20_tt-0.7_task-15_user-metrics.csv
0,0.97069,0.208374,0.24681,0.586844,0.1625,0.535714,0.374269,-0.078331,../metrics/mpcf-si/2016-05-30_23.07.20_si_ml-100k_e20_tt-0.7_task-11_user-metrics.csv
0,0.970666,0.229718,0.246098,0.61047,0.1625,0.529762,0.360417,-0.012849,../metrics/mpcf-si/2016-05-27_20.22.56_si_ml-100k_e20_tt-0.7_task-13_user-metrics.csv
0,0.967724,0.237217,0.224359,0.601419,0.15,0.446429,0.411706,-0.058185,../metrics/mpcf-si/2016-05-31_08.37.36_si_ml-100k_e20_tt-0.7_task-18_user-metrics.csv
0,0.967466,0.164535,0.168091,0.614515,0.1125,0.333333,0.397222,-0.029559,../metrics/mpcf-si/2016-05-30_18.31.35_si_ml-100k_e20_tt-0.7_task-9_user-metrics.csv
0,0.96518,0.145699,0.131054,0.595238,0.0875,0.261905,0.336465,-0.079279,../metrics/mpcf-si/2016-05-27_21.04.23_si_ml-100k_e20_tt-0.7_task-4_user-metrics.csv
0,0.964694,0.155462,0.149573,0.58515,0.1,0.297619,0.357655,-0.08118,../metrics/mpcf-si/2016-05-30_18.42.24_si_ml-100k_e20_tt-0.7_task-0_user-metrics.csv
0,0.964654,0.183651,0.262108,0.61917,0.175,0.52381,0.362787,0.002339,../metrics/mpcf-si/2016-05-28_02.47.05_si_ml-100k_e20_tt-0.7_task-22_user-metrics.csv


#### Movie Metrics

In [38]:
movie_metrics = [
'../metrics/mpcf-si/2016-05-27_19.53.55_si_ml-100k_e20_tt-0.7_task-9_movie-metrics.csv',
'../metrics/mpcf-si/2016-05-27_19.59.33_si_ml-100k_e20_tt-0.7_task-15_movie-metrics.csv',
'../metrics/mpcf-si/2016-05-27_20.08.37_si_ml-100k_e20_tt-0.7_task-21_movie-metrics.csv',
'../metrics/mpcf-si/2016-05-27_20.12.44_si_ml-100k_e20_tt-0.7_task-18_movie-metrics.csv',
'../metrics/mpcf-si/2016-05-27_20.14.47_si_ml-100k_e20_tt-0.7_task-11_movie-metrics.csv',
'../metrics/mpcf-si/2016-05-27_20.14.56_si_ml-100k_e20_tt-0.7_task-16_movie-metrics.csv',
'../metrics/mpcf-si/2016-05-27_20.22.56_si_ml-100k_e20_tt-0.7_task-13_movie-metrics.csv',
'../metrics/mpcf-si/2016-05-27_20.57.19_si_ml-100k_e20_tt-0.7_task-1_movie-metrics.csv',
'../metrics/mpcf-si/2016-05-27_21.01.53_si_ml-100k_e20_tt-0.7_task-19_movie-metrics.csv',
'../metrics/mpcf-si/2016-05-27_21.03.20_si_ml-100k_e20_tt-0.7_task-12_movie-metrics.csv',
'../metrics/mpcf-si/2016-05-27_21.04.23_si_ml-100k_e20_tt-0.7_task-4_movie-metrics.csv',
'../metrics/mpcf-si/2016-05-27_21.33.24_si_ml-100k_e20_tt-0.7_task-7_movie-metrics.csv',
'../metrics/mpcf-si/2016-05-27_21.44.02_si_ml-100k_e20_tt-0.7_task-3_movie-metrics.csv',
'../metrics/mpcf-si/2016-05-27_23.21.40_si_ml-100k_e20_tt-0.7_task-2_movie-metrics.csv',
'../metrics/mpcf-si/2016-05-27_23.49.17_si_ml-100k_e20_tt-0.7_task-23_movie-metrics.csv',
'../metrics/mpcf-si/2016-05-28_01.30.30_si_ml-100k_e20_tt-0.7_task-14_movie-metrics.csv',
'../metrics/mpcf-si/2016-05-28_02.47.05_si_ml-100k_e20_tt-0.7_task-22_movie-metrics.csv',
'../metrics/mpcf-si/2016-05-28_03.04.16_si_ml-100k_e20_tt-0.7_task-0_movie-metrics.csv',
'../metrics/mpcf-si/2016-05-28_03.09.13_si_ml-100k_e20_tt-0.7_task-17_movie-metrics.csv',
'../metrics/mpcf-si/2016-05-28_04.43.53_si_ml-100k_e20_tt-0.7_task-5_movie-metrics.csv',
'../metrics/mpcf-si/2016-05-30_17.34.49_si_ml-100k_e20_tt-0.7_task-23_movie-metrics.csv',
'../metrics/mpcf-si/2016-05-30_17.49.56_si_ml-100k_e20_tt-0.7_task-21_movie-metrics.csv',
'../metrics/mpcf-si/2016-05-30_18.00.20_si_ml-100k_e20_tt-0.7_task-3_movie-metrics.csv',
'../metrics/mpcf-si/2016-05-30_18.31.35_si_ml-100k_e20_tt-0.7_task-9_movie-metrics.csv',
'../metrics/mpcf-si/2016-05-30_18.42.24_si_ml-100k_e20_tt-0.7_task-0_movie-metrics.csv',
'../metrics/mpcf-si/2016-05-30_19.38.01_si_ml-100k_e20_tt-0.7_task-14_movie-metrics.csv',
'../metrics/mpcf-si/2016-05-30_19.55.26_si_ml-100k_e20_tt-0.7_task-13_movie-metrics.csv',
'../metrics/mpcf-si/2016-05-30_20.24.05_si_ml-100k_e20_tt-0.7_task-17_movie-metrics.csv',
'../metrics/mpcf-si/2016-05-30_21.22.49_si_ml-100k_e20_tt-0.7_task-15_movie-metrics.csv',
'../metrics/mpcf-si/2016-05-30_23.07.20_si_ml-100k_e20_tt-0.7_task-11_movie-metrics.csv',
'../metrics/mpcf-si/2016-05-31_01.29.40_si_ml-100k_e20_tt-0.7_task-19_movie-metrics.csv',
'../metrics/mpcf-si/2016-05-31_02.24.06_si_ml-100k_e20_tt-0.7_task-4_movie-metrics.csv',
'../metrics/mpcf-si/2016-05-31_03.42.06_si_ml-100k_e20_tt-0.7_task-22_movie-metrics.csv',
'../metrics/mpcf-si/2016-05-31_04.19.08_si_ml-100k_e20_tt-0.7_task-7_movie-metrics.csv',
'../metrics/mpcf-si/2016-05-31_05.11.15_si_ml-100k_e20_tt-0.7_task-10_movie-metrics.csv',
'../metrics/mpcf-si/2016-05-31_07.32.49_si_ml-100k_e20_tt-0.7_task-5_movie-metrics.csv',
'../metrics/mpcf-si/2016-05-31_08.37.36_si_ml-100k_e20_tt-0.7_task-18_movie-metrics.csv',
'../metrics/mpcf-si/2016-05-31_08.44.16_si_ml-100k_e20_tt-0.7_task-1_movie-metrics.csv',
'../metrics/mpcf-si/2016-05-31_08.54.39_si_ml-100k_e20_tt-0.7_task-12_movie-metrics.csv',
'../metrics/mpcf-si/2016-05-31_09.23.30_si_ml-100k_e20_tt-0.7_task-16_movie-metrics.csv',
'../metrics/mpcf-si/2016-05-31_11.40.56_si_ml-100k_e20_tt-0.7_task-20_movie-metrics.csv',
'../metrics/mpcf-si/2016-05-31_12.18.25_si_ml-100k_e20_tt-0.7_task-2_movie-metrics.csv',
'../metrics/mpcf-si/2016-05-31_12.42.06_si_ml-100k_e20_tt-0.7_task-8_movie-metrics.csv',
'../metrics/mpcf-si/2016-05-31_15.17.28_si_ml-100k_e20_tt-0.7_task-6_movie-metrics.csv',
]

In [40]:
df_movie_metrics = get_df_results(movie_metrics)
df_movie_metrics.sort_values('auc', ascending=False)[['auc', 'path']]

Unnamed: 0,auc,path
0,0.905773,../metrics/mpcf-si/2016-05-30_23.07.20_si_ml-100k_e20_tt-0.7_task-11_movie-metrics.csv
0,0.903832,../metrics/mpcf-si/2016-05-27_20.22.56_si_ml-100k_e20_tt-0.7_task-13_movie-metrics.csv
0,0.90127,../metrics/mpcf-si/2016-05-27_19.59.33_si_ml-100k_e20_tt-0.7_task-15_movie-metrics.csv
0,0.898383,../metrics/mpcf-si/2016-05-28_03.09.13_si_ml-100k_e20_tt-0.7_task-17_movie-metrics.csv
0,0.893846,../metrics/mpcf-si/2016-05-27_20.12.44_si_ml-100k_e20_tt-0.7_task-18_movie-metrics.csv
0,0.892883,../metrics/mpcf-si/2016-05-27_21.33.24_si_ml-100k_e20_tt-0.7_task-7_movie-metrics.csv
0,0.890931,../metrics/mpcf-si/2016-05-30_18.00.20_si_ml-100k_e20_tt-0.7_task-3_movie-metrics.csv
0,0.890696,../metrics/mpcf-si/2016-05-31_05.11.15_si_ml-100k_e20_tt-0.7_task-10_movie-metrics.csv
0,0.886521,../metrics/mpcf-si/2016-05-31_08.37.36_si_ml-100k_e20_tt-0.7_task-18_movie-metrics.csv
0,0.885921,../metrics/mpcf-si/2016-05-30_17.34.49_si_ml-100k_e20_tt-0.7_task-23_movie-metrics.csv


In [41]:
df_max_ratings = get_df_results(movie_metrics, max_train_ratings=5)
df_max_ratings.sort_values('auc', ascending=False)[['auc', 'path']]

Unnamed: 0,auc,path
0,0.896889,../metrics/mpcf-si/2016-05-30_23.07.20_si_ml-100k_e20_tt-0.7_task-11_movie-metrics.csv
0,0.890414,../metrics/mpcf-si/2016-05-30_17.34.49_si_ml-100k_e20_tt-0.7_task-23_movie-metrics.csv
0,0.881187,../metrics/mpcf-si/2016-05-31_11.40.56_si_ml-100k_e20_tt-0.7_task-20_movie-metrics.csv
0,0.877344,../metrics/mpcf-si/2016-05-28_04.43.53_si_ml-100k_e20_tt-0.7_task-5_movie-metrics.csv
0,0.87638,../metrics/mpcf-si/2016-05-27_20.22.56_si_ml-100k_e20_tt-0.7_task-13_movie-metrics.csv
0,0.873618,../metrics/mpcf-si/2016-05-30_18.00.20_si_ml-100k_e20_tt-0.7_task-3_movie-metrics.csv
0,0.872695,../metrics/mpcf-si/2016-05-31_08.54.39_si_ml-100k_e20_tt-0.7_task-12_movie-metrics.csv
0,0.872617,../metrics/mpcf-si/2016-05-27_20.12.44_si_ml-100k_e20_tt-0.7_task-18_movie-metrics.csv
0,0.872408,../metrics/mpcf-si/2016-05-27_19.59.33_si_ml-100k_e20_tt-0.7_task-15_movie-metrics.csv
0,0.870048,../metrics/mpcf-si/2016-05-27_21.04.23_si_ml-100k_e20_tt-0.7_task-4_movie-metrics.csv


## ML-1M - 0.7 train / 0.3 test - sparse item

#### User Metrics

In [49]:
user_metrics = [
'../metrics/mpcf-si/2016-06-01_08.54.21_si_ml-1m_e20_tt-0.7_task-3_user-metrics.csv',
'../metrics/mpcf-si/2016-06-01_15.01.00_si_ml-1m_e20_tt-0.7_task-1_user-metrics.csv',
'../metrics/mpcf-si/2016-06-01_16.33.47_si_ml-1m_e20_tt-0.7_task-3_user-metrics.csv',
'../metrics/mpcf-si/2016-06-01_23.35.02_si_ml-1m_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-02_00.57.17_si_ml-1m_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-02_18.09.41_si_ml-1m_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-02_22.12.23_si_ml-1m_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-03_20.02.46_si_ml-1m_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-06_20.10.02_si_ml-1m_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-06_23.07.49_si_ml-1m_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-06_23.29.54_si_ml-1m_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-07_05.39.35_si_ml-1m_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-07_20.47.47_si_ml-1m_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-07_21.39.22_si_ml-1m_e10_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-07_21.53.19_si_ml-1m_e10_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-07_23.02.17_si_ml-1m_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-08_00.56.27_si_ml-1m_e10_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-08_03.49.35_si_ml-1m_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-08_04.15.13_si_ml-1m_e10_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-08_04.29.32_si_ml-1m_e10_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-08_18.52.11_si_ml-1m_e10_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-08_20.08.05_si_ml-1m_e10_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-08_20.39.13_si_ml-1m_e10_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-08_22.49.46_si_ml-1m_e10_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-09_01.30.42_si_ml-1m_e10_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-09_01.32.24_si_ml-1m_e10_tt-0.7_task-0_user-metrics.csv',
]

In [50]:
df_results = get_df_results(user_metrics)
df_results.sort_values('auc', ascending=False)

Unnamed: 0,auc,avg_precision,f1,fcp,precision_at_20,recall_at_20,reciprocal_rank,spearman_rank_corr,path
0,0.925252,0.297928,0.263204,0.728678,0.318609,0.327986,0.702322,0.2257,../metrics/mpcf-si/2016-06-02_18.09.41_si_ml-1m_e20_tt-0.7_task-0_user-metrics.csv
0,0.92497,0.260039,0.235994,0.728433,0.287897,0.291247,0.629571,0.225268,../metrics/mpcf-si/2016-06-08_20.39.13_si_ml-1m_e10_tt-0.7_task-0_user-metrics.csv
0,0.924824,0.310038,0.270518,0.727141,0.326821,0.336965,0.716696,0.221798,../metrics/mpcf-si/2016-06-01_16.33.47_si_ml-1m_e20_tt-0.7_task-3_user-metrics.csv
0,0.923946,0.303491,0.265883,0.726918,0.322997,0.329248,0.710218,0.220628,../metrics/mpcf-si/2016-06-07_23.02.17_si_ml-1m_e20_tt-0.7_task-0_user-metrics.csv
0,0.919527,0.256535,0.232932,0.727058,0.285099,0.286471,0.633172,0.220902,../metrics/mpcf-si/2016-06-08_22.49.46_si_ml-1m_e10_tt-0.7_task-0_user-metrics.csv
0,0.915942,0.281615,0.24999,0.766436,0.311101,0.303153,0.687693,0.338127,../metrics/mpcf-si/2016-06-07_21.39.22_si_ml-1m_e10_tt-0.7_task-0_user-metrics.csv
0,0.914781,0.303208,0.265208,0.762645,0.327434,0.322262,0.717327,0.326963,../metrics/mpcf-si/2016-06-03_20.02.46_si_ml-1m_e20_tt-0.7_task-0_user-metrics.csv
0,0.913011,0.243559,0.223454,0.724174,0.27202,0.275932,0.615218,0.21197,../metrics/mpcf-si/2016-06-08_20.08.05_si_ml-1m_e10_tt-0.7_task-0_user-metrics.csv
0,0.911171,0.277091,0.246566,0.761759,0.309992,0.294635,0.67876,0.323863,../metrics/mpcf-si/2016-06-01_08.54.21_si_ml-1m_e20_tt-0.7_task-3_user-metrics.csv
0,0.909426,0.252964,0.226982,0.765135,0.289065,0.267403,0.654458,0.333376,../metrics/mpcf-si/2016-06-06_20.10.02_si_ml-1m_e20_tt-0.7_task-0_user-metrics.csv


In [6]:
# best config
with open('../models/mpcf-si/2016-06-02_18.09.41_si_ml-1m_e20_tt-0.7_task-0_config.json') as f:
    pp.pprint(json.loads(f.read()))

{   u'ada_eps': 1e-06,
    u'adagrad': True,
    u'binarize': True,
    u'binarize_neg': 0,
    u'binarize_pos': 1,
    u'binarize_threshold': 1,
    u'd2v_model': u'doc2vec-models/2016-04-14_17.36.08_20e_pv-dbow_size50_lr0.025_window8_neg5',
    u'eval_in_parallel': True,
    u'experiment_name': u'si_ml-1m_e20_tt-0.7_task-0',
    u'hit_threshold': 4,
    u'init_params_scale': 0.001,
    u'lr': 0.03,
    u'lr_decay': 0.02,
    u'metrics_save_dir': u'metrics/mpcf-si',
    u'model_save_dir': u'models/mpcf-si',
    u'nb_d2v_features': 50,
    u'nb_epochs': 20,
    u'nb_latent_f': 96,
    u'nb_user_pref': 8,
    u'pool_size': 24,
    u'precision_recall_at_n': 20,
    u'ratings_path': u'data/splits/ml-1m/ratings.csv',
    u'reg_lambda': 0.01,
    u'run_eval': True,
    u'run_movie_metrics': False,
    u'si_lambda_delta_qi': 0.03,
    u'si_lr': 0.003,
    u'si_lr_decay': 0.02,
    u'si_model': True,
    u'si_nn': [96, 160, 50],
    u'si_reg_lambda': 0.0003,
    u'sparse_item': True,
    u'te

## ML-100k - 0.7 train / 0.3 test - only 1 or 2 user pref - no binarize - sparse item

In [5]:
user_metrics = [
'../metrics/mpcf-si/2016-06-13_20.32.40_si_ml-100k_e20_tt-0.7_task-2_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_02.13.50_si_ml-100k_e20_tt-0.7_task-1_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_02.42.34_si_ml-100k_e20_tt-0.7_task-3_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_02.42.36_si_ml-100k_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_05.12.16_si_ml-100k_e20_tt-0.7_task-7_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_05.16.08_si_ml-100k_e20_tt-0.7_task-4_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_07.47.57_si_ml-100k_e20_tt-0.7_task-8_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_08.23.27_si_ml-100k_e20_tt-0.7_task-5_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_08.54.26_si_ml-100k_e20_tt-0.7_task-10_user-metrics.csv',
]

In [6]:
df_results = get_df_results(user_metrics)
df_results.sort_values('auc', ascending=False)

Unnamed: 0,auc,avg_precision,f1,fcp,precision_at_20,recall_at_20,reciprocal_rank,spearman_rank_corr,path
0,0.923356,0.34837,0.289676,0.75005,0.28632,0.447321,0.700376,0.292892,../metrics/mpcf-si/2016-06-14_08.23.27_si_ml-100k_e20_tt-0.7_task-5_user-metrics.csv
0,0.916473,0.352538,0.290847,0.744711,0.288494,0.444731,0.711323,0.277453,../metrics/mpcf-si/2016-06-14_08.54.26_si_ml-100k_e20_tt-0.7_task-10_user-metrics.csv
0,0.909906,0.374007,0.306835,0.739,0.303287,0.468608,0.728469,0.260242,../metrics/mpcf-si/2016-06-14_05.16.08_si_ml-100k_e20_tt-0.7_task-4_user-metrics.csv
0,0.90938,0.249534,0.226138,0.73803,0.226564,0.343813,0.585988,0.258501,../metrics/mpcf-si/2016-06-14_02.42.36_si_ml-100k_e20_tt-0.7_task-0_user-metrics.csv
0,0.904945,0.236814,0.214204,0.735245,0.216649,0.321243,0.568623,0.250564,../metrics/mpcf-si/2016-06-14_02.13.50_si_ml-100k_e20_tt-0.7_task-1_user-metrics.csv
0,0.904,0.23855,0.215654,0.734036,0.217922,0.323383,0.573838,0.246161,../metrics/mpcf-si/2016-06-14_02.42.34_si_ml-100k_e20_tt-0.7_task-3_user-metrics.csv
0,0.843826,0.147665,0.141233,0.724592,0.14597,0.206045,0.439783,0.217316,../metrics/mpcf-si/2016-06-13_20.32.40_si_ml-100k_e20_tt-0.7_task-2_user-metrics.csv
0,0.832356,0.140739,0.135059,0.73112,0.140774,0.193298,0.425854,0.236552,../metrics/mpcf-si/2016-06-14_05.12.16_si_ml-100k_e20_tt-0.7_task-7_user-metrics.csv
0,0.830544,0.140658,0.134967,0.732705,0.140774,0.194362,0.425417,0.241006,../metrics/mpcf-si/2016-06-14_07.47.57_si_ml-100k_e20_tt-0.7_task-8_user-metrics.csv


In [13]:
# best config
with open('../models/mpcf-si/2016-06-14_08.23.27_si_ml-100k_e20_tt-0.7_task-5_config.json') as f:
    pp.pprint(json.loads(f.read()))

{   u'ada_eps': 1e-06,
    u'adagrad': True,
    u'binarize': False,
    u'd2v_model': u'doc2vec-models/2016-04-14_17.36.08_20e_pv-dbow_size50_lr0.025_window8_neg5',
    u'eval_in_parallel': False,
    u'experiment_name': u'si_ml-100k_e20_tt-0.7_task-5',
    u'hit_threshold': 4,
    u'init_params_scale': 0.001,
    u'lr': 0.03,
    u'lr_decay': 0.02,
    u'metrics_save_dir': u'metrics/mpcf-si',
    u'model_save_dir': u'models/mpcf-si',
    u'nb_d2v_features': 50,
    u'nb_epochs': 20,
    u'nb_latent_f': 128,
    u'nb_user_pref': 2,
    u'precision_recall_at_n': 20,
    u'ratings_path': u'data/splits/ml-100k/ratings.csv',
    u'reg_lambda': 0.01,
    u'run_eval': True,
    u'run_movie_metrics': True,
    u'si_lambda_delta_qi': 0.01,
    u'si_lr': 0.001,
    u'si_lr_decay': 0.02,
    u'si_model': True,
    u'si_nn': [128, 160, 50],
    u'si_reg_lambda': 0.03,
    u'sparse_item': True,
    u'test': True,
    u'test_path': u'data/splits/ml-100k/sparse-item/0.7-test.csv',
    u'top_n_predi

In [22]:
# second best config
with open('../models/mpcf-si/2016-06-14_08.54.26_si_ml-100k_e20_tt-0.7_task-10_config.json') as f:
    pp.pprint(json.loads(f.read()))

{   u'ada_eps': 1e-06,
    u'adagrad': True,
    u'binarize': False,
    u'd2v_model': u'doc2vec-models/2016-04-14_17.36.08_20e_pv-dbow_size50_lr0.025_window8_neg5',
    u'eval_in_parallel': False,
    u'experiment_name': u'si_ml-100k_e20_tt-0.7_task-10',
    u'hit_threshold': 4,
    u'init_params_scale': 0.001,
    u'lr': 0.03,
    u'lr_decay': 0.0005,
    u'metrics_save_dir': u'metrics/mpcf-si',
    u'model_save_dir': u'models/mpcf-si',
    u'nb_d2v_features': 50,
    u'nb_epochs': 20,
    u'nb_latent_f': 128,
    u'nb_user_pref': 1,
    u'precision_recall_at_n': 20,
    u'ratings_path': u'data/splits/ml-100k/ratings.csv',
    u'reg_lambda': 0.01,
    u'run_eval': True,
    u'run_movie_metrics': True,
    u'si_lambda_delta_qi': 0.001,
    u'si_lr': 0.003,
    u'si_lr_decay': 0.0005,
    u'si_model': True,
    u'si_nn': [128, 50],
    u'si_reg_lambda': 0.003,
    u'sparse_item': True,
    u'test': True,
    u'test_path': u'data/splits/ml-100k/sparse-item/0.7-test.csv',
    u'top_n_pre

In [16]:
movie_metrics = [
'../metrics/mpcf-si/2016-06-13_20.32.40_si_ml-100k_e20_tt-0.7_task-2_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_02.13.50_si_ml-100k_e20_tt-0.7_task-1_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_02.42.34_si_ml-100k_e20_tt-0.7_task-3_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_02.42.36_si_ml-100k_e20_tt-0.7_task-0_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_05.12.16_si_ml-100k_e20_tt-0.7_task-7_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_05.16.08_si_ml-100k_e20_tt-0.7_task-4_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_07.47.57_si_ml-100k_e20_tt-0.7_task-8_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_08.23.27_si_ml-100k_e20_tt-0.7_task-5_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_08.54.26_si_ml-100k_e20_tt-0.7_task-10_movie-metrics.csv',
]

In [17]:
df_results = get_df_results(movie_metrics)
df_results.sort_values('auc', ascending=False)

Unnamed: 0,auc,movie_id,nb_times_in_top_n_predictions,path
0,0.881204,708.548841,78.061258,../metrics/mpcf-si/2016-06-14_08.23.27_si_ml-100k_e20_tt-0.7_task-5_movie-metrics.csv
0,0.875104,708.548841,78.061258,../metrics/mpcf-si/2016-06-14_08.54.26_si_ml-100k_e20_tt-0.7_task-10_movie-metrics.csv
0,0.874621,708.548841,78.062914,../metrics/mpcf-si/2016-06-14_02.42.36_si_ml-100k_e20_tt-0.7_task-0_movie-metrics.csv
0,0.867298,708.548841,78.062914,../metrics/mpcf-si/2016-06-14_02.13.50_si_ml-100k_e20_tt-0.7_task-1_movie-metrics.csv
0,0.866641,708.548841,78.062914,../metrics/mpcf-si/2016-06-14_02.42.34_si_ml-100k_e20_tt-0.7_task-3_movie-metrics.csv
0,0.861273,708.548841,78.04553,../metrics/mpcf-si/2016-06-14_05.16.08_si_ml-100k_e20_tt-0.7_task-4_movie-metrics.csv
0,0.797379,708.548841,78.062914,../metrics/mpcf-si/2016-06-13_20.32.40_si_ml-100k_e20_tt-0.7_task-2_movie-metrics.csv
0,0.776498,708.548841,78.062914,../metrics/mpcf-si/2016-06-14_05.12.16_si_ml-100k_e20_tt-0.7_task-7_movie-metrics.csv
0,0.770206,708.548841,78.062914,../metrics/mpcf-si/2016-06-14_07.47.57_si_ml-100k_e20_tt-0.7_task-8_movie-metrics.csv


## ML-100k - 0.2 train / 0.8 test - only 1 or 2 user pref - no binarize - sparse item

In [9]:
user_metrics = [
'../metrics/mpcf-si/2016-06-13_18.33.31_si_ml-100k_e20_tt-0.2_task-2_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_18.41.59_si_ml-100k_e20_tt-0.2_task-1_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_19.03.06_si_ml-100k_e20_tt-0.2_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_19.16.52_si_ml-100k_e20_tt-0.2_task-4_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_19.59.10_si_ml-100k_e20_tt-0.2_task-7_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_20.06.33_si_ml-100k_e20_tt-0.2_task-3_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_20.27.26_si_ml-100k_e20_tt-0.2_task-8_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_21.05.59_si_ml-100k_e20_tt-0.2_task-10_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_21.19.05_si_ml-100k_e20_tt-0.2_task-5_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_21.27.40_si_ml-100k_e20_tt-0.2_task-11_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_21.41.54_si_ml-100k_e20_tt-0.2_task-6_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_22.19.28_si_ml-100k_e20_tt-0.2_task-9_user-metrics.csv',
]

In [10]:
df_results = get_df_results(user_metrics)
df_results.sort_values('auc', ascending=False)

Unnamed: 0,auc,avg_precision,f1,fcp,precision_at_20,recall_at_20,reciprocal_rank,spearman_rank_corr,path
0,0.872824,0.319498,0.26117,0.720026,0.391039,0.260872,0.739518,0.213639,../metrics/mpcf-si/2016-06-13_19.59.10_si_ml-100k_e20_tt-0.2_task-7_user-metrics.csv
0,0.866678,0.311412,0.255594,0.716262,0.384464,0.253879,0.729688,0.202168,../metrics/mpcf-si/2016-06-13_18.41.59_si_ml-100k_e20_tt-0.2_task-1_user-metrics.csv
0,0.864149,0.261213,0.216963,0.725548,0.337222,0.212278,0.67014,0.230277,../metrics/mpcf-si/2016-06-13_18.33.31_si_ml-100k_e20_tt-0.2_task-2_user-metrics.csv
0,0.86209,0.257834,0.215673,0.72492,0.33351,0.211794,0.658596,0.228361,../metrics/mpcf-si/2016-06-13_21.19.05_si_ml-100k_e20_tt-0.2_task-5_user-metrics.csv
0,0.857224,0.314416,0.262846,0.712433,0.394115,0.262637,0.752623,0.191662,../metrics/mpcf-si/2016-06-13_20.27.26_si_ml-100k_e20_tt-0.2_task-8_user-metrics.csv
0,0.856939,0.249848,0.201621,0.725404,0.316384,0.197186,0.655211,0.2297,../metrics/mpcf-si/2016-06-13_22.19.28_si_ml-100k_e20_tt-0.2_task-9_user-metrics.csv
0,0.843233,0.295209,0.248227,0.712952,0.373065,0.247121,0.711294,0.192518,../metrics/mpcf-si/2016-06-13_20.06.33_si_ml-100k_e20_tt-0.2_task-3_user-metrics.csv
0,0.834434,0.216676,0.173069,0.726642,0.269247,0.169993,0.624945,0.233195,../metrics/mpcf-si/2016-06-13_21.41.54_si_ml-100k_e20_tt-0.2_task-6_user-metrics.csv
0,0.830866,0.28558,0.242142,0.708156,0.364687,0.240928,0.710368,0.178069,../metrics/mpcf-si/2016-06-13_21.05.59_si_ml-100k_e20_tt-0.2_task-10_user-metrics.csv
0,0.826472,0.304804,0.259735,0.707838,0.389555,0.258879,0.728161,0.177421,../metrics/mpcf-si/2016-06-13_21.27.40_si_ml-100k_e20_tt-0.2_task-11_user-metrics.csv


In [14]:
# best config
with open('../models/mpcf-si/2016-06-13_19.59.10_si_ml-100k_e20_tt-0.2_task-7_config.json') as f:
    pp.pprint(json.loads(f.read()))

{   u'ada_eps': 1e-06,
    u'adagrad': True,
    u'binarize': False,
    u'd2v_model': u'doc2vec-models/2016-04-14_17.36.08_20e_pv-dbow_size50_lr0.025_window8_neg5',
    u'eval_in_parallel': False,
    u'experiment_name': u'si_ml-100k_e20_tt-0.2_task-7',
    u'hit_threshold': 4,
    u'init_params_scale': 0.001,
    u'lr': 0.03,
    u'lr_decay': 0.0005,
    u'metrics_save_dir': u'metrics/mpcf-si',
    u'model_save_dir': u'models/mpcf-si',
    u'nb_d2v_features': 50,
    u'nb_epochs': 20,
    u'nb_latent_f': 128,
    u'nb_user_pref': 2,
    u'precision_recall_at_n': 20,
    u'ratings_path': u'data/splits/ml-100k/ratings.csv',
    u'reg_lambda': 0.04,
    u'run_eval': True,
    u'run_movie_metrics': True,
    u'si_lambda_delta_qi': 0.001,
    u'si_lr': 0.003,
    u'si_lr_decay': 0.02,
    u'si_model': True,
    u'si_nn': [128, 50],
    u'si_reg_lambda': 0.06,
    u'sparse_item': True,
    u'test': True,
    u'test_path': u'data/splits/ml-100k/sparse-item/0.2-test.csv',
    u'top_n_predict

In [18]:
movie_metrics = [
'../metrics/mpcf-si/2016-06-13_18.33.31_si_ml-100k_e20_tt-0.2_task-2_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_18.41.59_si_ml-100k_e20_tt-0.2_task-1_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_19.03.06_si_ml-100k_e20_tt-0.2_task-0_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_19.16.52_si_ml-100k_e20_tt-0.2_task-4_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_19.59.10_si_ml-100k_e20_tt-0.2_task-7_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_20.06.33_si_ml-100k_e20_tt-0.2_task-3_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_20.27.26_si_ml-100k_e20_tt-0.2_task-8_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_21.05.59_si_ml-100k_e20_tt-0.2_task-10_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_21.19.05_si_ml-100k_e20_tt-0.2_task-5_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_21.27.40_si_ml-100k_e20_tt-0.2_task-11_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_21.41.54_si_ml-100k_e20_tt-0.2_task-6_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_22.19.28_si_ml-100k_e20_tt-0.2_task-9_movie-metrics.csv',
]

In [19]:
df_results = get_df_results(movie_metrics)
df_results.sort_values('auc', ascending=False)

Unnamed: 0,auc,movie_id,nb_times_in_top_n_predictions,path
0,0.81981,743.317209,74.781919,../metrics/mpcf-si/2016-06-13_18.33.31_si_ml-100k_e20_tt-0.2_task-2_movie-metrics.csv
0,0.819287,743.317209,74.781919,../metrics/mpcf-si/2016-06-13_19.59.10_si_ml-100k_e20_tt-0.2_task-7_movie-metrics.csv
0,0.817092,743.317209,74.781919,../metrics/mpcf-si/2016-06-13_21.19.05_si_ml-100k_e20_tt-0.2_task-5_movie-metrics.csv
0,0.813141,743.317209,74.781919,../metrics/mpcf-si/2016-06-13_18.41.59_si_ml-100k_e20_tt-0.2_task-1_movie-metrics.csv
0,0.81228,743.317209,74.781919,../metrics/mpcf-si/2016-06-13_22.19.28_si_ml-100k_e20_tt-0.2_task-9_movie-metrics.csv
0,0.809432,743.317209,74.781919,../metrics/mpcf-si/2016-06-13_20.27.26_si_ml-100k_e20_tt-0.2_task-8_movie-metrics.csv
0,0.788353,743.317209,74.781919,../metrics/mpcf-si/2016-06-13_21.41.54_si_ml-100k_e20_tt-0.2_task-6_movie-metrics.csv
0,0.783707,743.317209,74.781919,../metrics/mpcf-si/2016-06-13_20.06.33_si_ml-100k_e20_tt-0.2_task-3_movie-metrics.csv
0,0.775064,743.317209,74.781919,../metrics/mpcf-si/2016-06-13_21.05.59_si_ml-100k_e20_tt-0.2_task-10_movie-metrics.csv
0,0.769275,743.317209,74.781919,../metrics/mpcf-si/2016-06-13_21.27.40_si_ml-100k_e20_tt-0.2_task-11_movie-metrics.csv


## ML-100k - 0.2 train / 0.8 test - only 1 or 2 user pref - no binarize - sparse user

In [11]:
user_metrics = [
'../metrics/mpcf-si/2016-06-13_19.32.32_si_ml-100k_e20_tt-0.2_sparse-user_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_19.40.56_si_ml-100k_e20_tt-0.2_sparse-user_task-3_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_19.52.33_si_ml-100k_e20_tt-0.2_sparse-user_task-2_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_19.54.07_si_ml-100k_e20_tt-0.2_sparse-user_task-4_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_20.03.15_si_ml-100k_e20_tt-0.2_sparse-user_task-5_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_20.18.12_si_ml-100k_e20_tt-0.2_sparse-user_task-6_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_20.21.14_si_ml-100k_e20_tt-0.2_sparse-user_task-8_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_21.02.36_si_ml-100k_e20_tt-0.2_sparse-user_task-9_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_21.29.26_si_ml-100k_e20_tt-0.2_sparse-user_task-11_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_21.50.13_si_ml-100k_e20_tt-0.2_sparse-user_task-1_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_22.03.25_si_ml-100k_e10_tt-0.2_sparse-user_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_22.15.23_si_ml-100k_e20_tt-0.2_sparse-user_task-7_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_22.20.42_si_ml-100k_e10_tt-0.2_sparse-user_task-1_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_22.25.31_si_ml-100k_e10_tt-0.2_sparse-user_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_22.29.32_si_ml-100k_e10_tt-0.2_sparse-user_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_22.34.01_si_ml-100k_e10_tt-0.2_sparse-user_task-1_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_22.35.36_si_ml-100k_e20_tt-0.2_sparse-user_task-10_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_22.39.47_si_ml-100k_e10_tt-0.2_sparse-user_task-2_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_22.43.48_si_ml-100k_e10_tt-0.2_sparse-user_task-2_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_23.03.12_si_ml-100k_e10_tt-0.2_sparse-user_task-3_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_23.16.27_si_ml-100k_e10_tt-0.2_sparse-user_task-4_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_23.28.40_si_ml-100k_e10_tt-0.2_sparse-user_task-1_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_23.34.04_si_ml-100k_e10_tt-0.2_sparse-user_task-3_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_23.39.44_si_ml-100k_e10_tt-0.2_sparse-user_task-5_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_23.48.50_si_ml-100k_e10_tt-0.2_sparse-user_task-6_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_23.56.41_si_ml-100k_e10_tt-0.2_sparse-user_task-4_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_00.05.20_si_ml-100k_e10_tt-0.2_sparse-user_task-5_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_00.09.50_si_ml-100k_e10_tt-0.2_sparse-user_task-7_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_00.25.40_si_ml-100k_e10_tt-0.2_sparse-user_task-6_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_00.46.27_si_ml-100k_e10_tt-0.2_sparse-user_task-2_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_00.48.39_si_ml-100k_e10_tt-0.2_sparse-user_task-7_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_01.27.00_si_ml-100k_e10_tt-0.2_sparse-user_task-8_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_01.35.29_si_ml-100k_e10_tt-0.2_sparse-user_task-8_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_01.56.07_si_ml-100k_e10_tt-0.2_sparse-user_task-9_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_02.06.35_si_ml-100k_e10_tt-0.2_sparse-user_task-3_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_02.29.55_si_ml-100k_e10_tt-0.2_sparse-user_task-4_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_02.45.53_si_ml-100k_e10_tt-0.2_sparse-user_task-9_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_02.46.07_si_ml-100k_e10_tt-0.2_sparse-user_task-5_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_02.54.54_si_ml-100k_e10_tt-0.2_sparse-user_task-6_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_03.01.41_si_ml-100k_e10_tt-0.2_sparse-user_task-10_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_03.16.57_si_ml-100k_e10_tt-0.2_sparse-user_task-10_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_03.26.55_si_ml-100k_e10_tt-0.2_sparse-user_task-7_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_03.30.49_si_ml-100k_e10_tt-0.2_sparse-user_task-11_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_03.38.44_si_ml-100k_e10_tt-0.2_sparse-user_task-11_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_04.30.25_si_ml-100k_e10_tt-0.2_sparse-user_task-8_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_04.58.46_si_ml-100k_e10_tt-0.2_sparse-user_task-9_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_06.19.12_si_ml-100k_e10_tt-0.2_sparse-user_task-10_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_07.21.21_si_ml-100k_e10_tt-0.2_sparse-user_task-11_user-metrics.csv',
]

In [12]:
df_results = get_df_results(user_metrics)
df_results.sort_values('auc', ascending=False)

Unnamed: 0,auc,avg_precision,f1,fcp,precision_at_20,recall_at_20,reciprocal_rank,spearman_rank_corr,path
0,0.882308,0.324922,0.26265,0.722217,0.396235,0.259784,0.74039,0.21985,../metrics/mpcf-si/2016-06-13_20.21.14_si_ml-100k_e20_tt-0.2_sparse-user_task-8_user-metrics.csv
0,0.881285,0.296906,0.237904,0.72205,0.365005,0.23419,0.715851,0.219108,../metrics/mpcf-si/2016-06-13_22.39.47_si_ml-100k_e10_tt-0.2_sparse-user_task-2_user-metrics.csv
0,0.878038,0.3034,0.242337,0.722445,0.371633,0.239095,0.722887,0.220123,../metrics/mpcf-si/2016-06-14_00.09.50_si_ml-100k_e10_tt-0.2_sparse-user_task-7_user-metrics.csv
0,0.877196,0.289306,0.229772,0.723696,0.354984,0.225494,0.70951,0.224007,../metrics/mpcf-si/2016-06-14_03.38.44_si_ml-100k_e10_tt-0.2_sparse-user_task-11_user-metrics.csv
0,0.875995,0.307702,0.247421,0.720545,0.375822,0.244117,0.723192,0.214048,../metrics/mpcf-si/2016-06-14_04.58.46_si_ml-100k_e10_tt-0.2_sparse-user_task-9_user-metrics.csv
0,0.87484,0.306267,0.247463,0.720888,0.375027,0.244528,0.716555,0.215736,../metrics/mpcf-si/2016-06-13_23.56.41_si_ml-100k_e10_tt-0.2_sparse-user_task-4_user-metrics.csv
0,0.873602,0.281935,0.226889,0.7216,0.352333,0.22127,0.686478,0.217184,../metrics/mpcf-si/2016-06-14_02.06.35_si_ml-100k_e10_tt-0.2_sparse-user_task-3_user-metrics.csv
0,0.873054,0.304556,0.247482,0.723713,0.375133,0.244815,0.701319,0.22442,../metrics/mpcf-si/2016-06-14_03.30.49_si_ml-100k_e10_tt-0.2_sparse-user_task-11_user-metrics.csv
0,0.872528,0.291758,0.232064,0.727084,0.358961,0.227407,0.716471,0.234221,../metrics/mpcf-si/2016-06-13_23.48.50_si_ml-100k_e10_tt-0.2_sparse-user_task-6_user-metrics.csv
0,0.872245,0.286836,0.22917,0.725506,0.355037,0.224327,0.713765,0.229821,../metrics/mpcf-si/2016-06-13_22.43.48_si_ml-100k_e10_tt-0.2_sparse-user_task-2_user-metrics.csv


In [15]:
# best config
with open('../models/mpcf-si/2016-06-13_20.21.14_si_ml-100k_e20_tt-0.2_sparse-user_task-8_config.json') as f:
    pp.pprint(json.loads(f.read()))

{   u'ada_eps': 1e-06,
    u'adagrad': True,
    u'binarize': False,
    u'd2v_model': u'doc2vec-models/2016-04-14_17.36.08_20e_pv-dbow_size50_lr0.025_window8_neg5',
    u'eval_in_parallel': False,
    u'experiment_name': u'si_ml-100k_e20_tt-0.2_sparse-user_task-8',
    u'hit_threshold': 4,
    u'init_params_scale': 0.001,
    u'lr': 0.03,
    u'lr_decay': 0.0005,
    u'metrics_save_dir': u'metrics/mpcf-si',
    u'model_save_dir': u'models/mpcf-si',
    u'nb_d2v_features': 50,
    u'nb_epochs': 20,
    u'nb_latent_f': 128,
    u'nb_user_pref': 1,
    u'precision_recall_at_n': 20,
    u'ratings_path': u'data/splits/ml-100k/ratings.csv',
    u'reg_lambda': 0.06,
    u'run_eval': True,
    u'run_movie_metrics': True,
    u'si_lambda_delta_qi': 0.0003,
    u'si_lr': 0.0003,
    u'si_lr_decay': 0.02,
    u'si_model': True,
    u'si_nn': [128, 50],
    u'si_reg_lambda': 0.06,
    u'sparse_item': True,
    u'test': True,
    u'test_path': u'data/splits/ml-100k/no-sparse-item/0.2-test.csv',
  

In [20]:
movie_metrics = [
'../metrics/mpcf-si/2016-06-13_19.32.32_si_ml-100k_e20_tt-0.2_sparse-user_task-0_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_19.40.56_si_ml-100k_e20_tt-0.2_sparse-user_task-3_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_19.52.33_si_ml-100k_e20_tt-0.2_sparse-user_task-2_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_19.54.07_si_ml-100k_e20_tt-0.2_sparse-user_task-4_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_20.03.15_si_ml-100k_e20_tt-0.2_sparse-user_task-5_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_20.18.12_si_ml-100k_e20_tt-0.2_sparse-user_task-6_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_20.21.14_si_ml-100k_e20_tt-0.2_sparse-user_task-8_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_21.02.36_si_ml-100k_e20_tt-0.2_sparse-user_task-9_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_21.29.26_si_ml-100k_e20_tt-0.2_sparse-user_task-11_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_21.50.13_si_ml-100k_e20_tt-0.2_sparse-user_task-1_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_22.03.25_si_ml-100k_e10_tt-0.2_sparse-user_task-0_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_22.15.23_si_ml-100k_e20_tt-0.2_sparse-user_task-7_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_22.20.42_si_ml-100k_e10_tt-0.2_sparse-user_task-1_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_22.25.31_si_ml-100k_e10_tt-0.2_sparse-user_task-0_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_22.29.32_si_ml-100k_e10_tt-0.2_sparse-user_task-0_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_22.34.01_si_ml-100k_e10_tt-0.2_sparse-user_task-1_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_22.35.36_si_ml-100k_e20_tt-0.2_sparse-user_task-10_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_22.39.47_si_ml-100k_e10_tt-0.2_sparse-user_task-2_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_22.43.48_si_ml-100k_e10_tt-0.2_sparse-user_task-2_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_23.03.12_si_ml-100k_e10_tt-0.2_sparse-user_task-3_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_23.16.27_si_ml-100k_e10_tt-0.2_sparse-user_task-4_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_23.28.40_si_ml-100k_e10_tt-0.2_sparse-user_task-1_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_23.34.04_si_ml-100k_e10_tt-0.2_sparse-user_task-3_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_23.39.44_si_ml-100k_e10_tt-0.2_sparse-user_task-5_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_23.48.50_si_ml-100k_e10_tt-0.2_sparse-user_task-6_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_23.56.41_si_ml-100k_e10_tt-0.2_sparse-user_task-4_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_00.05.20_si_ml-100k_e10_tt-0.2_sparse-user_task-5_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_00.09.50_si_ml-100k_e10_tt-0.2_sparse-user_task-7_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_00.25.40_si_ml-100k_e10_tt-0.2_sparse-user_task-6_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_00.46.27_si_ml-100k_e10_tt-0.2_sparse-user_task-2_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_00.48.39_si_ml-100k_e10_tt-0.2_sparse-user_task-7_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_01.27.00_si_ml-100k_e10_tt-0.2_sparse-user_task-8_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_01.35.29_si_ml-100k_e10_tt-0.2_sparse-user_task-8_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_01.56.07_si_ml-100k_e10_tt-0.2_sparse-user_task-9_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_02.06.35_si_ml-100k_e10_tt-0.2_sparse-user_task-3_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_02.29.55_si_ml-100k_e10_tt-0.2_sparse-user_task-4_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_02.45.53_si_ml-100k_e10_tt-0.2_sparse-user_task-9_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_02.46.07_si_ml-100k_e10_tt-0.2_sparse-user_task-5_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_02.54.54_si_ml-100k_e10_tt-0.2_sparse-user_task-6_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_03.01.41_si_ml-100k_e10_tt-0.2_sparse-user_task-10_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_03.16.57_si_ml-100k_e10_tt-0.2_sparse-user_task-10_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_03.26.55_si_ml-100k_e10_tt-0.2_sparse-user_task-7_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_03.30.49_si_ml-100k_e10_tt-0.2_sparse-user_task-11_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_03.38.44_si_ml-100k_e10_tt-0.2_sparse-user_task-11_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_04.30.25_si_ml-100k_e10_tt-0.2_sparse-user_task-8_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_04.58.46_si_ml-100k_e10_tt-0.2_sparse-user_task-9_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_06.19.12_si_ml-100k_e10_tt-0.2_sparse-user_task-10_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_07.21.21_si_ml-100k_e10_tt-0.2_sparse-user_task-11_movie-metrics.csv',
]

In [21]:
df_results = get_df_results(movie_metrics)
df_results.sort_values('auc', ascending=False)

Unnamed: 0,auc,movie_id,nb_times_in_top_n_predictions,path
0,0.839444,738.38738,75.319489,../metrics/mpcf-si/2016-06-14_03.38.44_si_ml-100k_e10_tt-0.2_sparse-user_task-11_movie-metrics.csv
0,0.83659,738.38738,75.319489,../metrics/mpcf-si/2016-06-13_22.39.47_si_ml-100k_e10_tt-0.2_sparse-user_task-2_movie-metrics.csv
0,0.836364,738.38738,75.319489,../metrics/mpcf-si/2016-06-13_20.21.14_si_ml-100k_e20_tt-0.2_sparse-user_task-8_movie-metrics.csv
0,0.835786,738.38738,75.319489,../metrics/mpcf-si/2016-06-13_22.25.31_si_ml-100k_e10_tt-0.2_sparse-user_task-0_movie-metrics.csv
0,0.833453,738.38738,75.319489,../metrics/mpcf-si/2016-06-13_22.43.48_si_ml-100k_e10_tt-0.2_sparse-user_task-2_movie-metrics.csv
0,0.833076,738.38738,75.319489,../metrics/mpcf-si/2016-06-14_02.06.35_si_ml-100k_e10_tt-0.2_sparse-user_task-3_movie-metrics.csv
0,0.832251,738.38738,75.319489,../metrics/mpcf-si/2016-06-14_00.46.27_si_ml-100k_e10_tt-0.2_sparse-user_task-2_movie-metrics.csv
0,0.831414,738.38738,75.319489,../metrics/mpcf-si/2016-06-14_00.09.50_si_ml-100k_e10_tt-0.2_sparse-user_task-7_movie-metrics.csv
0,0.831208,738.38738,75.319489,../metrics/mpcf-si/2016-06-14_01.56.07_si_ml-100k_e10_tt-0.2_sparse-user_task-9_movie-metrics.csv
0,0.831143,738.38738,75.319489,../metrics/mpcf-si/2016-06-13_23.48.50_si_ml-100k_e10_tt-0.2_sparse-user_task-6_movie-metrics.csv


## ML-100k - si model fixed

In [9]:
user_metrics = [
'../metrics/mpcf-si/2016-06-16_20.58.47_si_ml-100k_e20_tt-0.7_si-model-fixed_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-16_20.59.31_si_ml-100k_e20_tt-0.7_si-model-fixed_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-16_21.00.26_si_ml-100k_e20_tt-0.7_si-model-fixed_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-16_21.20.03_si_ml-100k_e20_tt-0.7_si-model-fixed_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-16_21.20.43_si_ml-100k_e20_tt-0.7_si-model-fixed_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-16_21.21.08_si_ml-100k_e20_tt-0.7_si-model-fixed_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-16_21.23.14_si_ml-100k_e20_tt-0.7_si-model-fixed_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-16_21.42.08_si_ml-100k_e20_tt-0.7_si-model-fixed_task-1_user-metrics.csv',
'../metrics/mpcf-si/2016-06-16_22.03.34_si_ml-100k_e20_tt-0.7_si-model-fixed_task-1_user-metrics.csv',
'../metrics/mpcf-si/2016-06-16_22.04.05_si_ml-100k_e20_tt-0.7_si-model-fixed_task-1_user-metrics.csv',
'../metrics/mpcf-si/2016-06-16_22.04.09_si_ml-100k_e20_tt-0.7_si-model-fixed_task-1_user-metrics.csv',
'../metrics/mpcf-si/2016-06-16_22.06.19_si_ml-100k_e20_tt-0.7_si-model-fixed_task-1_user-metrics.csv',
'../metrics/mpcf-si/2016-06-16_22.46.37_si_ml-100k_e20_tt-0.7_si-model-fixed_task-2_user-metrics.csv',
'../metrics/mpcf-si/2016-06-16_22.47.10_si_ml-100k_e20_tt-0.7_si-model-fixed_task-2_user-metrics.csv',
'../metrics/mpcf-si/2016-06-16_22.47.36_si_ml-100k_e20_tt-0.7_si-model-fixed_task-2_user-metrics.csv',
'../metrics/mpcf-si/2016-06-16_22.49.00_si_ml-100k_e20_tt-0.7_si-model-fixed_task-2_user-metrics.csv',
'../metrics/mpcf-si/2016-06-16_23.30.30_si_ml-100k_e20_tt-0.7_si-model-fixed_task-3_user-metrics.csv',
'../metrics/mpcf-si/2016-06-16_23.30.47_si_ml-100k_e20_tt-0.7_si-model-fixed_task-3_user-metrics.csv',
'../metrics/mpcf-si/2016-06-16_23.30.56_si_ml-100k_e20_tt-0.7_si-model-fixed_task-3_user-metrics.csv',
'../metrics/mpcf-si/2016-06-16_23.31.52_si_ml-100k_e20_tt-0.7_si-model-fixed_task-3_user-metrics.csv',
'../metrics/mpcf-si/2016-06-17_03.59.22_si_ml-100k_e20_tt-0.7_si-model-fixed_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-17_04.26.46_si_ml-100k_e20_tt-0.7_si-model-fixed_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-17_04.32.42_si_ml-100k_e20_tt-0.7_si-model-fixed_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-17_04.45.57_si_ml-100k_e20_tt-0.7_si-model-fixed_task-1_user-metrics.csv',
'../metrics/mpcf-si/2016-06-17_05.10.30_si_ml-100k_e20_tt-0.7_si-model-fixed_task-1_user-metrics.csv',
'../metrics/mpcf-si/2016-06-17_05.29.18_si_ml-100k_e20_tt-0.7_si-model-fixed_task-2_user-metrics.csv',
'../metrics/mpcf-si/2016-06-17_05.44.46_si_ml-100k_e20_tt-0.7_si-model-fixed_task-1_user-metrics.csv',
'../metrics/mpcf-si/2016-06-17_05.47.33_si_ml-100k_e20_tt-0.7_si-model-fixed_task-2_user-metrics.csv',
'../metrics/mpcf-si/2016-06-17_06.27.36_si_ml-100k_e20_tt-0.7_si-model-fixed_task-2_user-metrics.csv',
'../metrics/mpcf-si/2016-06-17_07.10.52_si_ml-100k_e20_tt-0.7_si-model-fixed_task-3_user-metrics.csv',
'../metrics/mpcf-si/2016-06-17_13.28.45_si_ml-100k_e20_tt-0.7_si-model-fixed_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-17_13.36.04_si_ml-100k_e20_tt-0.7_si-model-fixed_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-17_14.24.04_si_ml-100k_e20_tt-0.7_si-model-fixed_task-1_user-metrics.csv',
'../metrics/mpcf-si/2016-06-17_18.57.00_si_ml-100k_e20_tt-0.7_si-model-fixed_task-2_user-metrics.csv',
'../metrics/mpcf-si/2016-06-17_20.25.55_si_ml-100k_e20_tt-0.7_si-model-fixed_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-17_21.12.08_si_ml-100k_e20_tt-0.7_si-model-fixed_task-1_user-metrics.csv',
'../metrics/mpcf-si/2016-06-17_21.29.49_si_ml-100k_e20_tt-0.7_si-model-fixed_task-1_user-metrics.csv',
'../metrics/mpcf-si/2016-06-17_21.59.14_si_ml-100k_e20_tt-0.7_si-model-fixed_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-17_22.25.38_si_ml-100k_e20_tt-0.7_si-model-fixed_task-2_user-metrics.csv',
'../metrics/mpcf-si/2016-06-17_22.53.07_si_ml-100k_e20_tt-0.7_si-model-fixed_task-1_user-metrics.csv',
'../metrics/mpcf-si/2016-06-17_23.28.55_si_ml-100k_e20_tt-0.7_si-model-fixed_task-3_user-metrics.csv',
'../metrics/mpcf-si/2016-06-17_23.47.46_si_ml-100k_e20_tt-0.7_si-model-fixed_task-2_user-metrics.csv',
'../metrics/mpcf-si/2016-06-17_17.22.26_si_ml-100k_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-17_17.31.24_si_ml-100k_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-17_17.32.57_si_ml-100k_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-17_18.33.28_si_ml-100k_e20_tt-0.7_task-1_user-metrics.csv',
'../metrics/mpcf-si/2016-06-17_18.36.01_si_ml-100k_e20_tt-0.7_task-1_user-metrics.csv',
'../metrics/mpcf-si/2016-06-17_22.48.39_si_ml-100k_e20_tt-0.7_task-2_user-metrics.csv',
'../metrics/mpcf-si/2016-06-17_20.57.37_si_ml-100k_e20_tt-0.7_task-3_user-metrics.csv',
'../metrics/mpcf-si/2016-06-17_21.44.44_si_ml-100k_e20_tt-0.7_task-1_user-metrics.csv',
'../metrics/mpcf-si/2016-06-17_19.46.10_si_ml-100k_e20_tt-0.7_task-2_user-metrics.csv',

]

In [10]:
df_results = get_df_results(user_metrics)
df_results.sort_values('auc', ascending=False)

Unnamed: 0,auc,avg_precision,f1,fcp,precision_at_20,recall_at_20,reciprocal_rank,spearman_rank_corr,path
0,0.932099,0.293101,0.260691,0.715741,0.260127,0.397083,0.616977,0.192213,../metrics/mpcf-si/2016-06-17_17.31.24_si_ml-100k_e20_tt-0.7_task-0_user-metrics.csv
0,0.929869,0.281919,0.253909,0.715404,0.253818,0.384166,0.600488,0.190516,../metrics/mpcf-si/2016-06-17_22.48.39_si_ml-100k_e20_tt-0.7_task-2_user-metrics.csv
0,0.929007,0.330196,0.278368,0.750448,0.2772,0.423419,0.687988,0.295415,../metrics/mpcf-si/2016-06-17_04.45.57_si_ml-100k_e20_tt-0.7_si-model-fixed_task-1_user-metrics.csv
0,0.928428,0.27585,0.248214,0.714798,0.246554,0.381962,0.605774,0.188654,../metrics/mpcf-si/2016-06-17_21.44.44_si_ml-100k_e20_tt-0.7_task-1_user-metrics.csv
0,0.925944,0.350761,0.291791,0.748474,0.289661,0.444284,0.706503,0.288529,../metrics/mpcf-si/2016-06-16_21.42.08_si_ml-100k_e20_tt-0.7_si-model-fixed_task-1_user-metrics.csv
0,0.925738,0.321486,0.27187,0.748266,0.269618,0.415969,0.684938,0.288145,../metrics/mpcf-si/2016-06-16_23.30.56_si_ml-100k_e20_tt-0.7_si-model-fixed_task-3_user-metrics.csv
0,0.92569,0.313731,0.266208,0.747893,0.265058,0.406862,0.669079,0.286548,../metrics/mpcf-si/2016-06-17_04.26.46_si_ml-100k_e20_tt-0.7_si-model-fixed_task-0_user-metrics.csv
0,0.925622,0.303604,0.260526,0.748372,0.260339,0.396552,0.655528,0.288647,../metrics/mpcf-si/2016-06-16_22.06.19_si_ml-100k_e20_tt-0.7_si-model-fixed_task-1_user-metrics.csv
0,0.925371,0.312091,0.26892,0.748364,0.267815,0.409312,0.666577,0.287951,../metrics/mpcf-si/2016-06-17_06.27.36_si_ml-100k_e20_tt-0.7_si-model-fixed_task-2_user-metrics.csv
0,0.92513,0.317378,0.271458,0.745447,0.270467,0.413618,0.666109,0.280561,../metrics/mpcf-si/2016-06-16_22.47.10_si_ml-100k_e20_tt-0.7_si-model-fixed_task-2_user-metrics.csv


In [13]:
df_max_train = get_df_results(user_metrics, max_train_ratings=10)
df_max_train.sort_values('auc', ascending=False)

Unnamed: 0,auc,avg_precision,f1,fcp,precision_at_20,recall_at_20,reciprocal_rank,spearman_rank_corr,path
0,0.972753,0.20253,0.168091,0.581151,0.1125,0.333333,0.533597,-0.100549,../metrics/mpcf-si/2016-06-17_21.44.44_si_ml-100k_e20_tt-0.7_task-1_user-metrics.csv
0,0.970727,0.213451,0.20584,0.599527,0.1375,0.410714,0.556757,-0.054627,../metrics/mpcf-si/2016-06-17_04.26.46_si_ml-100k_e20_tt-0.7_si-model-fixed_task-0_user-metrics.csv
0,0.97032,0.196493,0.149573,0.586706,0.1,0.297619,0.544048,-0.104187,../metrics/mpcf-si/2016-06-17_17.31.24_si_ml-100k_e20_tt-0.7_task-0_user-metrics.csv
0,0.970094,0.275633,0.286356,0.650107,0.1875,0.64881,0.446429,0.124151,../metrics/mpcf-si/2016-06-17_18.36.01_si_ml-100k_e20_tt-0.7_task-1_user-metrics.csv
0,0.968794,0.207523,0.224359,0.627274,0.15,0.446429,0.543067,0.039396,../metrics/mpcf-si/2016-06-16_23.30.56_si_ml-100k_e20_tt-0.7_si-model-fixed_task-3_user-metrics.csv
0,0.968634,0.185518,0.149573,0.583501,0.1,0.297619,0.550926,-0.094013,../metrics/mpcf-si/2016-06-17_22.48.39_si_ml-100k_e20_tt-0.7_task-2_user-metrics.csv
0,0.968511,0.198213,0.187322,0.592521,0.125,0.375,0.543527,-0.086485,../metrics/mpcf-si/2016-06-17_06.27.36_si_ml-100k_e20_tt-0.7_si-model-fixed_task-2_user-metrics.csv
0,0.967168,0.206718,0.18661,0.565507,0.125,0.369048,0.557813,-0.135946,../metrics/mpcf-si/2016-06-17_04.45.57_si_ml-100k_e20_tt-0.7_si-model-fixed_task-1_user-metrics.csv
0,0.965913,0.253865,0.209061,0.660287,0.1375,0.458333,0.471491,0.124806,../metrics/mpcf-si/2016-06-17_21.12.08_si_ml-100k_e20_tt-0.7_si-model-fixed_task-1_user-metrics.csv
0,0.965313,0.192592,0.168803,0.579396,0.1125,0.339286,0.550595,-0.117177,../metrics/mpcf-si/2016-06-16_22.06.19_si_ml-100k_e20_tt-0.7_si-model-fixed_task-1_user-metrics.csv


In [11]:
# best config
with open('../models/mpcf-si/2016-06-17_17.31.24_si_ml-100k_e20_tt-0.7_task-0_config.json') as f:
    pp.pprint(json.loads(f.read()))

{   u'ada_eps': 1e-06,
    u'adagrad': True,
    u'binarize': True,
    u'binarize_neg': 0,
    u'binarize_pos': 1,
    u'binarize_threshold': 1,
    u'eval_in_parallel': True,
    u'experiment_name': u'si_ml-100k_e20_tt-0.7_task-0',
    u'hit_threshold': 4,
    u'init_params_scale': 0.001,
    u'lr': 0.03,
    u'lr_decay': 0.02,
    u'metrics_save_dir': u'metrics/mpcf-si',
    u'model_save_dir': u'models/mpcf-si',
    u'nb_epochs': 20,
    u'nb_latent_f': 128,
    u'nb_user_pref': 1,
    u'pool_size': 24,
    u'precision_recall_at_n': 20,
    u'ratings_path': u'data/splits/ml-100k/ratings.csv',
    u'reg_lambda': 0.03,
    u'run_eval': True,
    u'run_movie_metrics': True,
    u'si_item_cosine_lambda': 0.1,
    u'si_item_d2v_model': u'doc2vec-models/2016-04-14_17.36.08_20e_pv-dbow_size50_lr0.025_window8_neg5',
    u'si_item_lambda_d_item_f': 0.01,
    u'si_item_lr': 0.01,
    u'si_item_lr_decay': 0.02,
    u'si_item_model': True,
    u'si_item_nn': [128, 50],
    u'si_item_nn_hidden':

In [12]:
# second best config
with open('../models/mpcf-si/2016-06-17_22.48.39_si_ml-100k_e20_tt-0.7_task-2_config.json') as f:
    pp.pprint(json.loads(f.read()))

{   u'ada_eps': 1e-06,
    u'adagrad': True,
    u'binarize': True,
    u'binarize_neg': 0,
    u'binarize_pos': 1,
    u'binarize_threshold': 1,
    u'eval_in_parallel': True,
    u'experiment_name': u'si_ml-100k_e20_tt-0.7_task-2',
    u'hit_threshold': 4,
    u'init_params_scale': 0.001,
    u'lr': 0.04,
    u'lr_decay': 0.02,
    u'metrics_save_dir': u'metrics/mpcf-si',
    u'model_save_dir': u'models/mpcf-si',
    u'nb_epochs': 20,
    u'nb_latent_f': 64,
    u'nb_user_pref': 4,
    u'pool_size': 24,
    u'precision_recall_at_n': 20,
    u'ratings_path': u'data/splits/ml-100k/ratings.csv',
    u'reg_lambda': 0.03,
    u'run_eval': True,
    u'run_movie_metrics': True,
    u'si_item_cosine_lambda': 1,
    u'si_item_d2v_model': u'doc2vec-models/2016-04-14_17.36.08_20e_pv-dbow_size50_lr0.025_window8_neg5',
    u'si_item_lambda_d_item_f': 0.05,
    u'si_item_lr': 0.001,
    u'si_item_lr_decay': 0.02,
    u'si_item_model': True,
    u'si_item_nn': [64, 50],
    u'si_item_nn_hidden': []

In [14]:
movie_metrics = [
'../metrics/mpcf-si/2016-06-16_20.58.47_si_ml-100k_e20_tt-0.7_si-model-fixed_task-0_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-16_20.59.31_si_ml-100k_e20_tt-0.7_si-model-fixed_task-0_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-16_21.00.26_si_ml-100k_e20_tt-0.7_si-model-fixed_task-0_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-16_21.20.03_si_ml-100k_e20_tt-0.7_si-model-fixed_task-0_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-16_21.20.43_si_ml-100k_e20_tt-0.7_si-model-fixed_task-0_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-16_21.21.08_si_ml-100k_e20_tt-0.7_si-model-fixed_task-0_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-16_21.23.14_si_ml-100k_e20_tt-0.7_si-model-fixed_task-0_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-16_21.42.08_si_ml-100k_e20_tt-0.7_si-model-fixed_task-1_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-16_22.03.34_si_ml-100k_e20_tt-0.7_si-model-fixed_task-1_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-16_22.04.05_si_ml-100k_e20_tt-0.7_si-model-fixed_task-1_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-16_22.04.09_si_ml-100k_e20_tt-0.7_si-model-fixed_task-1_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-16_22.06.19_si_ml-100k_e20_tt-0.7_si-model-fixed_task-1_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-16_22.46.37_si_ml-100k_e20_tt-0.7_si-model-fixed_task-2_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-16_22.47.10_si_ml-100k_e20_tt-0.7_si-model-fixed_task-2_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-16_22.47.36_si_ml-100k_e20_tt-0.7_si-model-fixed_task-2_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-16_22.49.00_si_ml-100k_e20_tt-0.7_si-model-fixed_task-2_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-16_23.30.30_si_ml-100k_e20_tt-0.7_si-model-fixed_task-3_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-16_23.30.47_si_ml-100k_e20_tt-0.7_si-model-fixed_task-3_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-16_23.30.56_si_ml-100k_e20_tt-0.7_si-model-fixed_task-3_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-16_23.31.52_si_ml-100k_e20_tt-0.7_si-model-fixed_task-3_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-17_03.59.22_si_ml-100k_e20_tt-0.7_si-model-fixed_task-0_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-17_04.26.46_si_ml-100k_e20_tt-0.7_si-model-fixed_task-0_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-17_04.32.42_si_ml-100k_e20_tt-0.7_si-model-fixed_task-0_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-17_04.45.57_si_ml-100k_e20_tt-0.7_si-model-fixed_task-1_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-17_05.10.30_si_ml-100k_e20_tt-0.7_si-model-fixed_task-1_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-17_05.29.18_si_ml-100k_e20_tt-0.7_si-model-fixed_task-2_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-17_05.44.46_si_ml-100k_e20_tt-0.7_si-model-fixed_task-1_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-17_05.47.33_si_ml-100k_e20_tt-0.7_si-model-fixed_task-2_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-17_06.27.36_si_ml-100k_e20_tt-0.7_si-model-fixed_task-2_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-17_07.10.52_si_ml-100k_e20_tt-0.7_si-model-fixed_task-3_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-17_13.28.45_si_ml-100k_e20_tt-0.7_si-model-fixed_task-0_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-17_13.36.04_si_ml-100k_e20_tt-0.7_si-model-fixed_task-0_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-17_14.24.04_si_ml-100k_e20_tt-0.7_si-model-fixed_task-1_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-17_18.57.00_si_ml-100k_e20_tt-0.7_si-model-fixed_task-2_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-17_20.25.55_si_ml-100k_e20_tt-0.7_si-model-fixed_task-0_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-17_21.12.08_si_ml-100k_e20_tt-0.7_si-model-fixed_task-1_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-17_21.29.49_si_ml-100k_e20_tt-0.7_si-model-fixed_task-1_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-17_21.59.14_si_ml-100k_e20_tt-0.7_si-model-fixed_task-0_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-17_22.25.38_si_ml-100k_e20_tt-0.7_si-model-fixed_task-2_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-17_22.53.07_si_ml-100k_e20_tt-0.7_si-model-fixed_task-1_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-17_23.28.55_si_ml-100k_e20_tt-0.7_si-model-fixed_task-3_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-17_23.47.46_si_ml-100k_e20_tt-0.7_si-model-fixed_task-2_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-17_17.22.26_si_ml-100k_e20_tt-0.7_task-0_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-17_17.31.24_si_ml-100k_e20_tt-0.7_task-0_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-17_17.32.57_si_ml-100k_e20_tt-0.7_task-0_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-17_18.33.28_si_ml-100k_e20_tt-0.7_task-1_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-17_18.36.01_si_ml-100k_e20_tt-0.7_task-1_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-17_19.46.10_si_ml-100k_e20_tt-0.7_task-2_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-17_20.57.37_si_ml-100k_e20_tt-0.7_task-3_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-17_21.44.44_si_ml-100k_e20_tt-0.7_task-1_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-17_22.48.39_si_ml-100k_e20_tt-0.7_task-2_movie-metrics.csv',

]

In [15]:
df_results = get_df_results(movie_metrics)
df_results.sort_values('auc', ascending=False)

Unnamed: 0,auc,movie_id,nb_times_in_top_n_predictions,path
0,0.90692,708.548841,78.062914,../metrics/mpcf-si/2016-06-17_17.31.24_si_ml-100k_e20_tt-0.7_task-0_movie-metrics.csv
0,0.903361,708.548841,78.062914,../metrics/mpcf-si/2016-06-17_22.48.39_si_ml-100k_e20_tt-0.7_task-2_movie-metrics.csv
0,0.901118,708.548841,78.062914,../metrics/mpcf-si/2016-06-17_21.44.44_si_ml-100k_e20_tt-0.7_task-1_movie-metrics.csv
0,0.899424,708.548841,78.062914,../metrics/mpcf-si/2016-06-17_04.45.57_si_ml-100k_e20_tt-0.7_si-model-fixed_task-1_movie-metrics...
0,0.896213,708.548841,78.062914,../metrics/mpcf-si/2016-06-17_04.26.46_si_ml-100k_e20_tt-0.7_si-model-fixed_task-0_movie-metrics...
0,0.894513,708.548841,78.062914,../metrics/mpcf-si/2016-06-17_06.27.36_si_ml-100k_e20_tt-0.7_si-model-fixed_task-2_movie-metrics...
0,0.894455,708.548841,78.062914,../metrics/mpcf-si/2016-06-16_22.06.19_si_ml-100k_e20_tt-0.7_si-model-fixed_task-1_movie-metrics...
0,0.893973,708.548841,78.056291,../metrics/mpcf-si/2016-06-17_21.59.14_si_ml-100k_e20_tt-0.7_si-model-fixed_task-0_movie-metrics...
0,0.893393,708.548841,78.062914,../metrics/mpcf-si/2016-06-16_21.42.08_si_ml-100k_e20_tt-0.7_si-model-fixed_task-1_movie-metrics...
0,0.892939,708.548841,78.062086,../metrics/mpcf-si/2016-06-16_23.30.56_si_ml-100k_e20_tt-0.7_si-model-fixed_task-3_movie-metrics...


In [17]:
df_max_train = get_df_results(movie_metrics, max_train_ratings=5)
df_max_train.sort_values('auc', ascending=False)

Unnamed: 0,auc,movie_id,nb_times_in_top_n_predictions,path
0,0.901644,1294.073333,0.106667,../metrics/mpcf-si/2016-06-17_21.44.44_si_ml-100k_e20_tt-0.7_task-1_movie-metrics.csv
0,0.898959,1294.073333,0.1,../metrics/mpcf-si/2016-06-17_17.31.24_si_ml-100k_e20_tt-0.7_task-0_movie-metrics.csv
0,0.898822,1294.073333,0.066667,../metrics/mpcf-si/2016-06-17_22.48.39_si_ml-100k_e20_tt-0.7_task-2_movie-metrics.csv
0,0.895724,1294.073333,0.133333,../metrics/mpcf-si/2016-06-17_04.26.46_si_ml-100k_e20_tt-0.7_si-model-fixed_task-0_movie-metrics...
0,0.891069,1294.073333,0.34,../metrics/mpcf-si/2016-06-16_23.30.56_si_ml-100k_e20_tt-0.7_si-model-fixed_task-3_movie-metrics...
0,0.890478,1294.073333,0.133333,../metrics/mpcf-si/2016-06-17_04.45.57_si_ml-100k_e20_tt-0.7_si-model-fixed_task-1_movie-metrics...
0,0.889884,1294.073333,0.14,../metrics/mpcf-si/2016-06-17_21.59.14_si_ml-100k_e20_tt-0.7_si-model-fixed_task-0_movie-metrics...
0,0.887428,1294.073333,0.086667,../metrics/mpcf-si/2016-06-16_22.06.19_si_ml-100k_e20_tt-0.7_si-model-fixed_task-1_movie-metrics...
0,0.887353,1294.073333,0.426667,../metrics/mpcf-si/2016-06-17_13.28.45_si_ml-100k_e20_tt-0.7_si-model-fixed_task-0_movie-metrics...
0,0.885759,1294.073333,0.18,../metrics/mpcf-si/2016-06-17_06.27.36_si_ml-100k_e20_tt-0.7_si-model-fixed_task-2_movie-metrics...


## ML-100k - 20e - 0.56 train/0.14 val/0.3 test

In [2]:
results = [
'../metrics/mpcf-si/2016-05-10_03.31.59_si_ml-100k_e20_tt-0.7_task-46_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_03.32.41_si_ml-100k_e20_tt-0.7_task-16_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_03.32.41_si_ml-100k_e20_tt-0.7_task-22_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_03.34.14_si_ml-100k_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_03.40.49_si_ml-100k_e20_tt-0.7_task-30_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_03.44.28_si_ml-100k_e20_tt-0.7_task-28_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_03.44.49_si_ml-100k_e20_tt-0.7_task-12_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_03.44.54_si_ml-100k_e20_tt-0.7_task-38_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_03.45.34_si_ml-100k_e20_tt-0.7_task-10_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_03.47.02_si_ml-100k_e20_tt-0.7_task-44_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_03.49.52_si_ml-100k_e20_tt-0.7_task-2_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_03.50.32_si_ml-100k_e20_tt-0.7_task-42_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_03.50.49_si_ml-100k_e20_tt-0.7_task-20_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_03.56.47_si_ml-100k_e20_tt-0.7_task-14_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_03.57.44_si_ml-100k_e20_tt-0.7_task-26_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_04.00.07_si_ml-100k_e20_tt-0.7_task-4_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_04.02.02_si_ml-100k_e20_tt-0.7_task-8_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_04.04.43_si_ml-100k_e20_tt-0.7_task-6_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_04.06.25_si_ml-100k_e20_tt-0.7_task-40_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_04.07.31_si_ml-100k_e20_tt-0.7_task-32_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_04.08.18_si_ml-100k_e20_tt-0.7_task-36_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_04.09.12_si_ml-100k_e20_tt-0.7_task-17_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_04.12.01_si_ml-100k_e20_tt-0.7_task-24_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_04.12.26_si_ml-100k_e20_tt-0.7_task-23_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_04.12.44_si_ml-100k_e20_tt-0.7_task-1_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_04.13.32_si_ml-100k_e20_tt-0.7_task-21_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_04.17.48_si_ml-100k_e20_tt-0.7_task-3_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_04.24.01_si_ml-100k_e20_tt-0.7_task-18_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_04.25.31_si_ml-100k_e20_tt-0.7_task-29_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_04.25.39_si_ml-100k_e20_tt-0.7_task-27_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_04.27.41_si_ml-100k_e20_tt-0.7_task-9_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_04.27.57_si_ml-100k_e20_tt-0.7_task-13_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_04.32.14_si_ml-100k_e20_tt-0.7_task-34_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_04.38.29_si_ml-100k_e20_tt-0.7_task-31_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_04.45.11_si_ml-100k_e20_tt-0.7_task-39_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_04.46.52_si_ml-100k_e20_tt-0.7_task-45_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_04.47.40_si_ml-100k_e20_tt-0.7_task-19_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_04.55.19_si_ml-100k_e20_tt-0.7_task-54_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_04.56.52_si_ml-100k_e20_tt-0.7_task-52_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_04.57.45_si_ml-100k_e20_tt-0.7_task-15_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_05.00.21_si_ml-100k_e20_tt-0.7_task-56_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_05.00.55_si_ml-100k_e20_tt-0.7_task-64_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_05.03.08_si_ml-100k_e20_tt-0.7_task-5_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_05.10.52_si_ml-100k_e20_tt-0.7_task-11_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_05.11.54_si_ml-100k_e20_tt-0.7_task-47_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_05.15.59_si_ml-100k_e20_tt-0.7_task-43_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_05.21.10_si_ml-100k_e20_tt-0.7_task-55_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_05.23.55_si_ml-100k_e20_tt-0.7_task-33_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_05.24.05_si_ml-100k_e20_tt-0.7_task-7_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_05.26.33_si_ml-100k_e20_tt-0.7_task-58_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_05.28.35_si_ml-100k_e20_tt-0.7_task-37_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_05.29.22_si_ml-100k_e20_tt-0.7_task-48_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_05.29.23_si_ml-100k_e20_tt-0.7_task-70_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_05.29.45_si_ml-100k_e20_tt-0.7_task-35_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_05.30.46_si_ml-100k_e20_tt-0.7_task-41_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_05.31.10_si_ml-100k_e20_tt-0.7_task-25_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_05.37.05_si_ml-100k_e20_tt-0.7_task-57_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_05.40.37_si_ml-100k_e20_tt-0.7_task-68_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_05.44.15_si_ml-100k_e20_tt-0.7_task-72_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_05.48.05_si_ml-100k_e20_tt-0.7_task-80_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_05.49.04_si_ml-100k_e20_tt-0.7_task-62_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_05.53.28_si_ml-100k_e20_tt-0.7_task-60_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_05.53.32_si_ml-100k_e20_tt-0.7_task-65_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_05.55.38_si_ml-100k_e20_tt-0.7_task-50_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_05.57.10_si_ml-100k_e20_tt-0.7_task-53_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_05.57.58_si_ml-100k_e20_tt-0.7_task-74_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_05.59.46_si_ml-100k_e20_tt-0.7_task-82_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_06.05.17_si_ml-100k_e20_tt-0.7_task-66_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_06.05.34_si_ml-100k_e20_tt-0.7_task-86_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_06.11.41_si_ml-100k_e20_tt-0.7_task-78_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_06.15.46_si_ml-100k_e20_tt-0.7_task-63_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_06.15.59_si_ml-100k_e20_tt-0.7_task-69_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_06.19.17_si_ml-100k_e20_tt-0.7_task-84_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_06.19.30_si_ml-100k_e20_tt-0.7_task-61_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_06.21.40_si_ml-100k_e20_tt-0.7_task-92_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_06.24.23_si_ml-100k_e20_tt-0.7_task-59_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_06.30.02_si_ml-100k_e20_tt-0.7_task-71_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_06.31.46_si_ml-100k_e20_tt-0.7_task-76_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_06.32.05_si_ml-100k_e20_tt-0.7_task-51_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_06.32.43_si_ml-100k_e20_tt-0.7_task-94_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_06.32.51_si_ml-100k_e20_tt-0.7_task-49_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_06.33.36_si_ml-100k_e20_tt-0.7_task-83_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_06.34.02_si_ml-100k_e20_tt-0.7_task-98_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_06.36.11_si_ml-100k_e20_tt-0.7_task-73_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_06.43.06_si_ml-100k_e20_tt-0.7_task-88_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_06.45.09_si_ml-100k_e20_tt-0.7_task-90_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_06.49.43_si_ml-100k_e20_tt-0.7_task-79_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_06.51.20_si_ml-100k_e20_tt-0.7_task-85_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_06.57.30_si_ml-100k_e20_tt-0.7_task-96_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_07.00.38_si_ml-100k_e20_tt-0.7_task-75_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_07.02.30_si_ml-100k_e20_tt-0.7_task-93_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_07.05.37_si_ml-100k_e20_tt-0.7_task-81_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_07.07.48_si_ml-100k_e20_tt-0.7_task-77_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_07.09.41_si_ml-100k_e20_tt-0.7_task-99_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_07.12.36_si_ml-100k_e20_tt-0.7_task-95_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_07.22.50_si_ml-100k_e20_tt-0.7_task-89_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_07.27.23_si_ml-100k_e20_tt-0.7_task-67_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_07.35.17_si_ml-100k_e20_tt-0.7_task-97_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_07.41.40_si_ml-100k_e20_tt-0.7_task-87_user-metrics.csv',
'../metrics/mpcf-si/2016-05-10_07.43.27_si_ml-100k_e20_tt-0.7_task-91_user-metrics.csv',
]

In [3]:
result_map = map(lambda p: (p, pd.read_csv(p)['auc'].mean()), results)
sorted(result_map, key=lambda t:t[1], reverse=True)

[('../metrics/mpcf-si/2016-05-10_06.36.11_si_ml-100k_e20_tt-0.7_task-73_user-metrics.csv',
  0.9086596254760263),
 ('../metrics/mpcf-si/2016-05-10_07.12.36_si_ml-100k_e20_tt-0.7_task-95_user-metrics.csv',
  0.9029804869782388),
 ('../metrics/mpcf-si/2016-05-10_06.43.06_si_ml-100k_e20_tt-0.7_task-88_user-metrics.csv',
  0.9027816483420635),
 ('../metrics/mpcf-si/2016-05-10_07.09.41_si_ml-100k_e20_tt-0.7_task-99_user-metrics.csv',
  0.9023023530508396),
 ('../metrics/mpcf-si/2016-05-10_03.34.14_si_ml-100k_e20_tt-0.7_task-0_user-metrics.csv',
  0.9010245550849715),
 ('../metrics/mpcf-si/2016-05-10_07.35.17_si_ml-100k_e20_tt-0.7_task-97_user-metrics.csv',
  0.9010092429222203),
 ('../metrics/mpcf-si/2016-05-10_04.04.43_si_ml-100k_e20_tt-0.7_task-6_user-metrics.csv',
  0.896626946996296),
 ('../metrics/mpcf-si/2016-05-10_04.08.18_si_ml-100k_e20_tt-0.7_task-36_user-metrics.csv',
  0.8964513910576768),
 ('../metrics/mpcf-si/2016-05-10_03.44.28_si_ml-100k_e20_tt-0.7_task-28_user-metrics.csv',


In [5]:
with open('../models/mpcf-si/2016-05-10_06.34.11_si_ml-100k_e20_tt-0.7_task-73_config.json') as f:
    pp.pprint(json.loads(f.read()))

{   u'binarize': False,
    u'd2v_model': u'doc2vec-models/2016-04-14_17.36.08_20e_pv-dbow_size50_lr0.025_window8_neg5',
    u'eval_in_parallel': False,
    u'experiment_name': u'si_ml-100k_e20_tt-0.7_task-73',
    u'hit_threshold': 4,
    u'init_params_scale': 0.001,
    u'lr': 0.01,
    u'lr_decay': 0.02,
    u'lr_delta_qi': 0.001,
    u'lr_delta_qi_decay': 0.03,
    u'lr_si': 0.003,
    u'lr_si_decay': 0.02,
    u'nb_d2v_features': 50,
    u'nb_epochs': 20,
    u'nb_latent_f': 96,
    u'nb_user_pref': 2,
    u'pool_size': 2,
    u'precision_recall_at_n': 20,
    u'ratings_path': u'data/splits/ml-100k/ratings.csv',
    u'reg_lambda': 0.003,
    u'run_eval': True,
    u'run_movie_metrics': False,
    u'save_on_epoch_end': False,
    u'si_model': True,
    u'si_nn': [96, 200, 100, 50],
    u'si_reg_lambda': 0.003,
    u'sparse_item': True,
    u'test': True,
    u'test_path': u'data/splits/ml-100k/sparse-item/0.7-test.csv',
    u'top_n_predictions': 100,
    u'train_path': u'data/split

In [6]:
with open('../models/mpcf-si/2016-05-10_07.10.38_si_ml-100k_e20_tt-0.7_task-95_config.json') as f:
    pp.pprint(json.loads(f.read()))

{   u'binarize': False,
    u'd2v_model': u'doc2vec-models/2016-04-14_17.36.08_20e_pv-dbow_size50_lr0.025_window8_neg5',
    u'eval_in_parallel': False,
    u'experiment_name': u'si_ml-100k_e20_tt-0.7_task-95',
    u'hit_threshold': 4,
    u'init_params_scale': 0.001,
    u'lr': 0.003,
    u'lr_decay': 0.0005,
    u'lr_delta_qi': 0.001,
    u'lr_delta_qi_decay': 0.03,
    u'lr_si': 0.0003,
    u'lr_si_decay': 0.02,
    u'nb_d2v_features': 50,
    u'nb_epochs': 20,
    u'nb_latent_f': 128,
    u'nb_user_pref': 2,
    u'pool_size': 2,
    u'precision_recall_at_n': 20,
    u'ratings_path': u'data/splits/ml-100k/ratings.csv',
    u'reg_lambda': 0.003,
    u'run_eval': True,
    u'run_movie_metrics': False,
    u'save_on_epoch_end': False,
    u'si_model': True,
    u'si_nn': [128, 160, 50],
    u'si_reg_lambda': 0.001,
    u'sparse_item': True,
    u'test': True,
    u'test_path': u'data/splits/ml-100k/sparse-item/0.7-test.csv',
    u'top_n_predictions': 100,
    u'train_path': u'data/spli

In [7]:
with open('../models/mpcf-si/2016-05-10_06.40.35_si_ml-100k_e20_tt-0.7_task-88_config.json') as f:
    pp.pprint(json.loads(f.read()))

{   u'binarize': False,
    u'd2v_model': u'doc2vec-models/2016-04-14_17.36.08_20e_pv-dbow_size50_lr0.025_window8_neg5',
    u'eval_in_parallel': False,
    u'experiment_name': u'si_ml-100k_e20_tt-0.7_task-88',
    u'hit_threshold': 4,
    u'init_params_scale': 0.001,
    u'lr': 0.01,
    u'lr_decay': 0.03,
    u'lr_delta_qi': 0.001,
    u'lr_delta_qi_decay': 0.0005,
    u'lr_si': 0.001,
    u'lr_si_decay': 0.0005,
    u'nb_d2v_features': 50,
    u'nb_epochs': 20,
    u'nb_latent_f': 96,
    u'nb_user_pref': 8,
    u'pool_size': 2,
    u'precision_recall_at_n': 20,
    u'ratings_path': u'data/splits/ml-100k/ratings.csv',
    u'reg_lambda': 0.003,
    u'run_eval': True,
    u'run_movie_metrics': False,
    u'save_on_epoch_end': False,
    u'si_model': True,
    u'si_nn': [96, 200, 100, 50],
    u'si_reg_lambda': 0.003,
    u'sparse_item': True,
    u'test': True,
    u'test_path': u'data/splits/ml-100k/sparse-item/0.7-test.csv',
    u'top_n_predictions': 100,
    u'train_path': u'data/s

## ML-1m - 20e - 0.2 train/0.8 test

In [6]:
results = [
'../metrics/mpcf-si/2016-05-12_21.42.09_si_ml-1m_e20_tt-0.2_task-10_user-metrics.csv',
'../metrics/mpcf-si/2016-05-12_21.54.06_si_ml-1m_e20_tt-0.2_task-22_user-metrics.csv',
'../metrics/mpcf-si/2016-05-12_22.09.07_si_ml-1m_e20_tt-0.2_task-15_user-metrics.csv',
'../metrics/mpcf-si/2016-05-12_22.17.12_si_ml-1m_e20_tt-0.2_task-8_user-metrics.csv',
'../metrics/mpcf-si/2016-05-12_22.31.54_si_ml-1m_e20_tt-0.2_task-4_user-metrics.csv',
'../metrics/mpcf-si/2016-05-12_22.56.26_si_ml-1m_e20_tt-0.2_task-13_user-metrics.csv',
'../metrics/mpcf-si/2016-05-12_23.16.03_si_ml-1m_e20_tt-0.2_task-5_user-metrics.csv',
'../metrics/mpcf-si/2016-05-12_23.19.54_si_ml-1m_e20_tt-0.2_task-6_user-metrics.csv',
'../metrics/mpcf-si/2016-05-12_23.20.22_si_ml-1m_e20_tt-0.2_task-11_user-metrics.csv',
'../metrics/mpcf-si/2016-05-12_23.59.46_si_ml-1m_e20_tt-0.2_task-21_user-metrics.csv',
'../metrics/mpcf-si/2016-05-13_04.29.41_si_ml-1m_e20_tt-0.2_task-14_user-metrics.csv',
'../metrics/mpcf-si/2016-05-13_05.28.39_si_ml-1m_e20_tt-0.2_task-16_user-metrics.csv',
'../metrics/mpcf-si/2016-05-13_07.09.47_si_ml-1m_e20_tt-0.2_task-7_user-metrics.csv',
'../metrics/mpcf-si/2016-05-13_09.02.12_si_ml-1m_e20_tt-0.2_task-3_user-metrics.csv',
'../metrics/mpcf-si/2016-05-13_09.27.22_si_ml-1m_e20_tt-0.2_task-20_user-metrics.csv',
'../metrics/mpcf-si/2016-05-13_11.20.41_si_ml-1m_e20_tt-0.2_task-17_user-metrics.csv',
'../metrics/mpcf-si/2016-05-13_14.21.53_si_ml-1m_e20_tt-0.2_task-18_user-metrics.csv',
'../metrics/mpcf-si/2016-05-14_02.27.18_si_ml-1m_e20_tt-0.2_task-19_user-metrics.csv',
'../metrics/mpcf-si/2016-05-14_13.38.01_si_ml-1m_e20_tt-0.2_task-9_user-metrics.csv',
'../metrics/mpcf-si/2016-05-14_15.50.34_si_ml-1m_e20_tt-0.2_task-23_user-metrics.csv',
'../metrics/mpcf-si/2016-05-14_18.08.25_si_ml-1m_e20_tt-0.2_task-12_user-metrics.csv',
'../metrics/mpcf-si/2016-05-14_18.36.59_si_ml-1m_e20_tt-0.2_task-1_user-metrics.csv',
]

In [7]:
result_map = map(lambda p: (p, pd.read_csv(p)['auc'].mean()), results)
sorted(result_map, key=lambda t:t[1], reverse=True)

[('../metrics/mpcf-si/2016-05-13_05.28.39_si_ml-1m_e20_tt-0.2_task-16_user-metrics.csv',
  0.8859463617462386),
 ('../metrics/mpcf-si/2016-05-13_14.21.53_si_ml-1m_e20_tt-0.2_task-18_user-metrics.csv',
  0.8855081599169033),
 ('../metrics/mpcf-si/2016-05-13_09.02.12_si_ml-1m_e20_tt-0.2_task-3_user-metrics.csv',
  0.885143206566869),
 ('../metrics/mpcf-si/2016-05-12_22.17.12_si_ml-1m_e20_tt-0.2_task-8_user-metrics.csv',
  0.8838837398920015),
 ('../metrics/mpcf-si/2016-05-14_18.08.25_si_ml-1m_e20_tt-0.2_task-12_user-metrics.csv',
  0.8820469989389469),
 ('../metrics/mpcf-si/2016-05-14_18.36.59_si_ml-1m_e20_tt-0.2_task-1_user-metrics.csv',
  0.8794071815128031),
 ('../metrics/mpcf-si/2016-05-12_22.31.54_si_ml-1m_e20_tt-0.2_task-4_user-metrics.csv',
  0.8759978512668563),
 ('../metrics/mpcf-si/2016-05-13_07.09.47_si_ml-1m_e20_tt-0.2_task-7_user-metrics.csv',
  0.8753356131585126),
 ('../metrics/mpcf-si/2016-05-12_21.54.06_si_ml-1m_e20_tt-0.2_task-22_user-metrics.csv',
  0.8749777720108439)

In [10]:
with open('../models/mpcf-si/2016-05-13_05.28.39_si_ml-1m_e20_tt-0.2_task-16_config.json') as f:
    pp.pprint(json.loads(f.read()))

{   u'ada_eps': 1e-06,
    u'adagrad': True,
    u'binarize': True,
    u'binarize_neg': 0,
    u'binarize_pos': 1,
    u'binarize_threshold': 1,
    u'd2v_model': u'doc2vec-models/2016-04-14_17.36.08_20e_pv-dbow_size50_lr0.025_window8_neg5',
    u'eval_in_parallel': False,
    u'experiment_name': u'si_ml-1m_e20_tt-0.2_task-16',
    u'hit_threshold': 4,
    u'init_params_scale': 0.001,
    u'lr': 0.01,
    u'lr_decay': 0.02,
    u'metrics_save_dir': u'metrics/mpcf-si',
    u'model_save_dir': u'models/mpcf-si',
    u'nb_d2v_features': 50,
    u'nb_epochs': 20,
    u'nb_latent_f': 128,
    u'nb_user_pref': 4,
    u'precision_recall_at_n': 20,
    u'ratings_path': u'data/splits/ml-1m/ratings.csv',
    u'reg_lambda': 0.003,
    u'run_eval': True,
    u'run_movie_metrics': False,
    u'si_lambda_delta_qi': 0.0003,
    u'si_lr': 0.003,
    u'si_lr_decay': 0.0005,
    u'si_model': True,
    u'si_nn': [128, 50],
    u'si_reg_lambda': 0.001,
    u'sparse_item': True,
    u'test': True,
    u'te

In [11]:
with open('../models/mpcf-si/2016-05-13_14.21.53_si_ml-1m_e20_tt-0.2_task-18_config.json') as f:
    pp.pprint(json.loads(f.read()))

{   u'ada_eps': 1e-06,
    u'adagrad': True,
    u'binarize': False,
    u'd2v_model': u'doc2vec-models/2016-04-14_17.36.08_20e_pv-dbow_size50_lr0.025_window8_neg5',
    u'eval_in_parallel': False,
    u'experiment_name': u'si_ml-1m_e20_tt-0.2_task-18',
    u'hit_threshold': 4,
    u'init_params_scale': 0.001,
    u'lr': 0.01,
    u'lr_decay': 0.0005,
    u'metrics_save_dir': u'metrics/mpcf-si',
    u'model_save_dir': u'models/mpcf-si',
    u'nb_d2v_features': 50,
    u'nb_epochs': 20,
    u'nb_latent_f': 128,
    u'nb_user_pref': 8,
    u'precision_recall_at_n': 20,
    u'ratings_path': u'data/splits/ml-1m/ratings.csv',
    u'reg_lambda': 0.003,
    u'run_eval': True,
    u'run_movie_metrics': False,
    u'si_lambda_delta_qi': 0.0003,
    u'si_lr': 0.01,
    u'si_lr_decay': 0.0005,
    u'si_model': True,
    u'si_nn': [128, 50],
    u'si_reg_lambda': 0.003,
    u'sparse_item': True,
    u'test': True,
    u'test_path': u'data/splits/ml-1m/sparse-item/0.2-test.csv',
    u'top_n_predict

In [12]:
with open('../models/mpcf-si/2016-05-13_09.02.12_si_ml-1m_e20_tt-0.2_task-3_config.json') as f:
    pp.pprint(json.loads(f.read()))

{   u'ada_eps': 1e-06,
    u'adagrad': True,
    u'binarize': True,
    u'binarize_neg': 0,
    u'binarize_pos': 1,
    u'binarize_threshold': 1,
    u'd2v_model': u'doc2vec-models/2016-04-14_17.36.08_20e_pv-dbow_size50_lr0.025_window8_neg5',
    u'eval_in_parallel': False,
    u'experiment_name': u'si_ml-1m_e20_tt-0.2_task-3',
    u'hit_threshold': 4,
    u'init_params_scale': 0.001,
    u'lr': 0.01,
    u'lr_decay': 0.0005,
    u'metrics_save_dir': u'metrics/mpcf-si',
    u'model_save_dir': u'models/mpcf-si',
    u'nb_d2v_features': 50,
    u'nb_epochs': 20,
    u'nb_latent_f': 96,
    u'nb_user_pref': 4,
    u'precision_recall_at_n': 20,
    u'ratings_path': u'data/splits/ml-1m/ratings.csv',
    u'reg_lambda': 0.003,
    u'run_eval': True,
    u'run_movie_metrics': False,
    u'si_lambda_delta_qi': 0.0003,
    u'si_lr': 0.01,
    u'si_lr_decay': 0.02,
    u'si_model': True,
    u'si_nn': [96, 50],
    u'si_reg_lambda': 0.003,
    u'sparse_item': True,
    u'test': True,
    u'test_p

In [16]:
with open('../models/mpcf-si/2016-05-12_22.17.12_si_ml-1m_e20_tt-0.2_task-8_config.json') as f:
    pp.pprint(json.loads(f.read()))

{   u'ada_eps': 1e-06,
    u'adagrad': True,
    u'binarize': True,
    u'binarize_neg': 0,
    u'binarize_pos': 1,
    u'binarize_threshold': 1,
    u'd2v_model': u'doc2vec-models/2016-04-14_17.36.08_20e_pv-dbow_size50_lr0.025_window8_neg5',
    u'eval_in_parallel': False,
    u'experiment_name': u'si_ml-1m_e20_tt-0.2_task-8',
    u'hit_threshold': 4,
    u'init_params_scale': 0.001,
    u'lr': 0.01,
    u'lr_decay': 0.0005,
    u'metrics_save_dir': u'metrics/mpcf-si',
    u'model_save_dir': u'models/mpcf-si',
    u'nb_d2v_features': 50,
    u'nb_epochs': 20,
    u'nb_latent_f': 128,
    u'nb_user_pref': 8,
    u'precision_recall_at_n': 20,
    u'ratings_path': u'data/splits/ml-1m/ratings.csv',
    u'reg_lambda': 0.001,
    u'run_eval': True,
    u'run_movie_metrics': False,
    u'si_lambda_delta_qi': 0.0003,
    u'si_lr': 0.001,
    u'si_lr_decay': 0.0005,
    u'si_model': True,
    u'si_nn': [128, 50],
    u'si_reg_lambda': 0.001,
    u'sparse_item': True,
    u'test': True,
    u't

## ML-1m - 50e - 0.2 train/0.8 test - no sparse item

In [2]:
results = [
'../metrics/mpcf-si/2016-05-19_19.53.34_si_ml-1m_e50_tt-0.2_no-sparse_task-3_user-metrics.csv',
'../metrics/mpcf-si/2016-05-19_20.35.04_si_ml-1m_e50_tt-0.2_no-sparse_task-6_user-metrics.csv',
'../metrics/mpcf-si/2016-05-19_20.47.11_si_ml-1m_e50_tt-0.2_no-sparse_task-13_user-metrics.csv',
'../metrics/mpcf-si/2016-05-19_20.50.36_si_ml-1m_e50_tt-0.2_no-sparse_task-10_user-metrics.csv',
'../metrics/mpcf-si/2016-05-20_00.10.45_si_ml-1m_e50_tt-0.2_no-sparse_task-22_user-metrics.csv',
'../metrics/mpcf-si/2016-05-20_01.07.39_si_ml-1m_e50_tt-0.2_no-sparse_task-15_user-metrics.csv',
'../metrics/mpcf-si/2016-05-20_04.20.39_si_ml-1m_e50_tt-0.2_no-sparse_task-18_user-metrics.csv',
'../metrics/mpcf-si/2016-05-20_04.48.10_si_ml-1m_e50_tt-0.2_no-sparse_task-20_user-metrics.csv',
'../metrics/mpcf-si/2016-05-20_06.16.52_si_ml-1m_e50_tt-0.2_no-sparse_task-9_user-metrics.csv',
'../metrics/mpcf-si/2016-05-20_07.21.43_si_ml-1m_e50_tt-0.2_no-sparse_task-14_user-metrics.csv',
'../metrics/mpcf-si/2016-05-20_11.03.20_si_ml-1m_e50_tt-0.2_no-sparse_task-16_user-metrics.csv',
]

In [12]:
result_map = map(lambda p: (p, pd.read_csv(p)['auc'].mean()), results)
result_map = sorted(result_map, key=lambda t:t[1], reverse=True)
result_map

[('../metrics/mpcf-si/2016-05-19_20.35.04_si_ml-1m_e50_tt-0.2_no-sparse_task-6_user-metrics.csv',
  0.892831727593386),
 ('../metrics/mpcf-si/2016-05-20_01.07.39_si_ml-1m_e50_tt-0.2_no-sparse_task-15_user-metrics.csv',
  0.8782814017069732),
 ('../metrics/mpcf-si/2016-05-20_04.20.39_si_ml-1m_e50_tt-0.2_no-sparse_task-18_user-metrics.csv',
  0.8766955190961875),
 ('../metrics/mpcf-si/2016-05-20_04.48.10_si_ml-1m_e50_tt-0.2_no-sparse_task-20_user-metrics.csv',
  0.8734907857465698),
 ('../metrics/mpcf-si/2016-05-19_20.50.36_si_ml-1m_e50_tt-0.2_no-sparse_task-10_user-metrics.csv',
  0.8718088345344018),
 ('../metrics/mpcf-si/2016-05-20_06.16.52_si_ml-1m_e50_tt-0.2_no-sparse_task-9_user-metrics.csv',
  0.8599464081662715),
 ('../metrics/mpcf-si/2016-05-20_00.10.45_si_ml-1m_e50_tt-0.2_no-sparse_task-22_user-metrics.csv',
  0.8430412718955667),
 ('../metrics/mpcf-si/2016-05-20_07.21.43_si_ml-1m_e50_tt-0.2_no-sparse_task-14_user-metrics.csv',
  0.842536772476636),
 ('../metrics/mpcf-si/2016-0

In [10]:
print "AUC", result_map[0][1]
with open('../models/mpcf-si/2016-05-19_20.35.04_si_ml-1m_e50_tt-0.2_no-sparse_task-6_config.json') as f:
    pp.pprint(json.loads(f.read()))

AUC 0.892831727593
{   u'ada_eps': 1e-06,
    u'adagrad': True,
    u'binarize': True,
    u'binarize_neg': 0,
    u'binarize_pos': 1,
    u'binarize_threshold': 1,
    u'd2v_model': u'doc2vec-models/2016-04-14_17.36.08_20e_pv-dbow_size50_lr0.025_window8_neg5',
    u'eval_in_parallel': False,
    u'experiment_name': u'si_ml-1m_e50_tt-0.2_no-sparse_task-6',
    u'hit_threshold': 4,
    u'init_params_scale': 0.001,
    u'lr': 0.01,
    u'lr_decay': 0.0005,
    u'metrics_save_dir': u'metrics/mpcf-si',
    u'model_save_dir': u'models/mpcf-si',
    u'nb_d2v_features': 50,
    u'nb_epochs': 50,
    u'nb_latent_f': 128,
    u'nb_user_pref': 4,
    u'precision_recall_at_n': 20,
    u'ratings_path': u'data/splits/ml-1m/ratings.csv',
    u'reg_lambda': 0.01,
    u'run_eval': True,
    u'run_movie_metrics': False,
    u'si_lambda_delta_qi': 0.0003,
    u'si_lr': 0.001,
    u'si_lr_decay': 0.02,
    u'si_model': True,
    u'si_nn': [128, 50],
    u'si_reg_lambda': 0.001,
    u'sparse_item': False,

In [11]:
print "AUC", result_map[1][1]
with open('../models/mpcf-si/2016-05-20_01.07.39_si_ml-1m_e50_tt-0.2_no-sparse_task-15_config.json') as f:
    pp.pprint(json.loads(f.read()))

AUC 0.878281401707
{   u'ada_eps': 1e-06,
    u'adagrad': True,
    u'binarize': True,
    u'binarize_neg': 0,
    u'binarize_pos': 1,
    u'binarize_threshold': 1,
    u'd2v_model': u'doc2vec-models/2016-04-14_17.36.08_20e_pv-dbow_size50_lr0.025_window8_neg5',
    u'eval_in_parallel': False,
    u'experiment_name': u'si_ml-1m_e50_tt-0.2_no-sparse_task-15',
    u'hit_threshold': 4,
    u'init_params_scale': 0.001,
    u'lr': 0.003,
    u'lr_decay': 0.0005,
    u'metrics_save_dir': u'metrics/mpcf-si',
    u'model_save_dir': u'models/mpcf-si',
    u'nb_d2v_features': 50,
    u'nb_epochs': 50,
    u'nb_latent_f': 96,
    u'nb_user_pref': 2,
    u'precision_recall_at_n': 20,
    u'ratings_path': u'data/splits/ml-1m/ratings.csv',
    u'reg_lambda': 0.001,
    u'run_eval': True,
    u'run_movie_metrics': False,
    u'si_lambda_delta_qi': 0.0003,
    u'si_lr': 0.0003,
    u'si_lr_decay': 0.02,
    u'si_model': True,
    u'si_nn': [96, 160, 50],
    u'si_reg_lambda': 0.001,
    u'sparse_item':