In [1]:
import numpy as np
import pandas as pd
import json
import pprint
import json
pp = pprint.PrettyPrinter(indent=4)
pd.options.display.max_colwidth = 100

In [2]:
def df_mean(path, apply_to_df):
    df_mean = pd.read_csv(path)
    df_mean = apply_to_df(df_mean)
        
    if 'nb_movies_not_in_train' in df_mean:
        df_mean = df_mean.drop(['nb_movies_not_in_train'],axis=1)
    if 'nb_test_ratings' in df_mean:
        df_mean = df_mean.drop(['nb_test_ratings'],axis=1)
    if 'nb_train_ratings' in df_mean:
        df_mean = df_mean.drop(['nb_train_ratings'],axis=1)
    if 'user_id' in df_mean:
        df_mean = df_mean.drop(['user_id'],axis=1)
    
    df_mean = df_mean.mean(0).to_frame().T
    df_mean['path'] = path
    
    return df_mean
    

def get_df_results(results, max_train_ratings=None):
    if not max_train_ratings:
        apply_to_df = lambda df: df[df['nb_test_ratings'] > 0]
    else:
        apply_to_df = lambda df: df[(df['nb_test_ratings'] > 0) & (df['nb_train_ratings'] <= max_train_ratings)]
        
    result_map = map(lambda path: df_mean(path, apply_to_df), results)
    return reduce(lambda x, y: x.append(y), result_map)

## BEST ML-100k - 0.7 train / 0.3 test - sparse item

In [3]:
user_metrics = [
'../metrics/mpcf-si/2016-06-27_08.23.04_si_ml-100k_e20_tt-0.7_task-1_user-metrics.csv',
'../metrics/mpcf-si/2016-06-27_15.54.03_si_ml-100k_e20_tt-0.7_train-2_user-metrics.csv',
'../metrics/mpcf-si/2016-06-27_15.38.42_si_ml-100k_e20_tt-0.7_train-3_user-metrics.csv',
'../metrics/mpcf-si/2016-06-27_15.40.07_si_ml-100k_e20_tt-0.7_train-4_user-metrics.csv',
'../metrics/mpcf-si/2016-06-27_15.40.38_si_ml-100k_e20_tt-0.7_train-5_user-metrics.csv',
]

In [4]:
df_cv_user_metrics = get_df_results(user_metrics)
df_cv_user_metrics

Unnamed: 0,auc,avg_precision,f1,fcp,precision_at_20,recall_at_20,reciprocal_rank,spearman_rank_corr,path
0,0.937411,0.333537,0.284084,0.718398,0.278791,0.440227,0.669648,0.200229,../metrics/mpcf-si/2016-06-27_08.23.04_si_ml-100k_e20_tt-0.7_task-1_user-metrics.csv
0,0.937531,0.342135,0.281319,0.72032,0.277466,0.437622,0.687535,0.211969,../metrics/mpcf-si/2016-06-27_15.54.03_si_ml-100k_e20_tt-0.7_train-2_user-metrics.csv
0,0.937996,0.343926,0.291175,0.718098,0.285737,0.448687,0.683606,0.206335,../metrics/mpcf-si/2016-06-27_15.38.42_si_ml-100k_e20_tt-0.7_train-3_user-metrics.csv
0,0.93659,0.33824,0.288532,0.711397,0.282927,0.446143,0.698782,0.194453,../metrics/mpcf-si/2016-06-27_15.40.07_si_ml-100k_e20_tt-0.7_train-4_user-metrics.csv
0,0.938654,0.339878,0.28666,0.7126,0.282874,0.440936,0.693267,0.189542,../metrics/mpcf-si/2016-06-27_15.40.38_si_ml-100k_e20_tt-0.7_train-5_user-metrics.csv


In [5]:
df_cv_user_metrics.mean(0)

auc                   0.937636
avg_precision         0.339543
f1                    0.286354
fcp                   0.716163
precision_at_20       0.281559
recall_at_20          0.442723
reciprocal_rank       0.686568
spearman_rank_corr    0.200506
dtype: float64

In [6]:
df_max_train = get_df_results(user_metrics, max_train_ratings=20)
df_max_train.mean(0)

auc                   0.955823
avg_precision         0.291424
f1                    0.185653
fcp                   0.694692
precision_at_20       0.117177
recall_at_20          0.556097
reciprocal_rank       0.522466
spearman_rank_corr    0.127012
dtype: float64

In [7]:
with open('../models/mpcf-si/2016-06-27_08.23.04_si_ml-100k_e20_tt-0.7_task-1_config.json') as f:
    pp.pprint(json.loads(f.read()))

{   u'ada_eps': 1e-06,
    u'adagrad': True,
    u'binarize': True,
    u'binarize_neg': 0,
    u'binarize_pos': 1,
    u'binarize_threshold': 1,
    u'eval_in_parallel': True,
    u'experiment_name': u'si_ml-100k_e20_tt-0.7_task-1',
    u'hit_threshold': 4,
    u'init_params_scale': 0.001,
    u'lr': 0.01,
    u'lr_decay': 0.02,
    u'metrics_save_dir': u'metrics/mpcf-si',
    u'model_save_dir': u'models/mpcf-si',
    u'nb_epochs': 20,
    u'nb_latent_f': 128,
    u'nb_user_pref': 4,
    u'pool_size': 4,
    u'precision_recall_at_n': 20,
    u'ratings_path': u'data/splits/ml-100k/ratings.csv',
    u'reg_lambda': 0.01,
    u'run_eval': True,
    u'run_movie_metrics': True,
    u'si_item_cosine_lambda': 0.7,
    u'si_item_d2v_model': u'doc2vec-models/2016-04-14_17.36.08_20e_pv-dbow_size50_lr0.025_window8_neg5',
    u'si_item_lambda_d_item_f': 0.001,
    u'si_item_lr': 0.03,
    u'si_item_lr_decay': 0.02,
    u'si_item_model': True,
    u'si_item_nn': [128, 50],
    u'si_item_nn_hidden':

In [8]:
movie_metrics = [
'../metrics/mpcf-si/2016-06-27_08.23.04_si_ml-100k_e20_tt-0.7_task-1_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-27_15.54.03_si_ml-100k_e20_tt-0.7_train-2_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-27_15.38.42_si_ml-100k_e20_tt-0.7_train-3_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-27_15.40.07_si_ml-100k_e20_tt-0.7_train-4_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-27_15.40.38_si_ml-100k_e20_tt-0.7_train-5_movie-metrics.csv',]


In [9]:
df_cv_movie_metrics = get_df_results(movie_metrics)
df_cv_movie_metrics

Unnamed: 0,auc,movie_id,nb_times_in_top_n_predictions,path
0,0.911637,708.548841,78.057119,../metrics/mpcf-si/2016-06-27_08.23.04_si_ml-100k_e20_tt-0.7_task-1_movie-metrics.csv
0,0.909158,708.548841,78.05298,../metrics/mpcf-si/2016-06-27_15.54.03_si_ml-100k_e20_tt-0.7_train-2_movie-metrics.csv
0,0.906159,708.548841,78.053808,../metrics/mpcf-si/2016-06-27_15.38.42_si_ml-100k_e20_tt-0.7_train-3_movie-metrics.csv
0,0.905151,708.548841,78.057947,../metrics/mpcf-si/2016-06-27_15.40.07_si_ml-100k_e20_tt-0.7_train-4_movie-metrics.csv
0,0.909571,708.548841,78.053808,../metrics/mpcf-si/2016-06-27_15.40.38_si_ml-100k_e20_tt-0.7_train-5_movie-metrics.csv


In [10]:
df_cv_movie_metrics.mean(0)

auc                                0.908335
movie_id                         708.548841
nb_times_in_top_n_predictions     78.055132
dtype: float64

In [11]:
df_max_train = get_df_results(movie_metrics, max_train_ratings=5)
df_max_train.mean(0)

auc                                 0.883668
movie_id                         1294.073333
nb_times_in_top_n_predictions       1.020000
dtype: float64

## BEST ML-1m - 0.7 train / 0.3 test - sparse item

In [12]:
user_metrics = [
'../metrics/mpcf-si/2016-06-29_13.23.36_si_ml-1m_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-30_07.38.02_si_ml-1m_e20_tt-0.7_train-2_user-metrics.csv',
'../metrics/mpcf-si/2016-06-30_07.32.45_si_ml-1m_e20_tt-0.7_train-3_user-metrics.csv',
'../metrics/mpcf-si/2016-06-30_07.36.22_si_ml-1m_e20_tt-0.7_train-4_user-metrics.csv',
'../metrics/mpcf-si/2016-06-30_07.35.21_si_ml-1m_e20_tt-0.7_train-5_user-metrics.csv',
]

In [13]:
df_cv_user_metrics = get_df_results(user_metrics)
df_cv_user_metrics

Unnamed: 0,auc,avg_precision,f1,fcp,precision_at_20,recall_at_20,reciprocal_rank,spearman_rank_corr,path
0,0.928988,0.300825,0.264203,0.728434,0.320969,0.328111,0.696645,0.22523,../metrics/mpcf-si/2016-06-29_13.23.36_si_ml-1m_e20_tt-0.7_task-0_user-metrics.csv
0,0.928455,0.300784,0.264845,0.729459,0.321631,0.329507,0.692621,0.22969,../metrics/mpcf-si/2016-06-30_07.38.02_si_ml-1m_e20_tt-0.7_train-2_user-metrics.csv
0,0.929049,0.302119,0.265117,0.730205,0.321531,0.328364,0.69459,0.229941,../metrics/mpcf-si/2016-06-30_07.32.45_si_ml-1m_e20_tt-0.7_train-3_user-metrics.csv
0,0.929466,0.300946,0.263662,0.728314,0.320662,0.327169,0.697964,0.222816,../metrics/mpcf-si/2016-06-30_07.36.22_si_ml-1m_e20_tt-0.7_train-4_user-metrics.csv
0,0.92805,0.297046,0.263256,0.727035,0.320795,0.325333,0.688788,0.222144,../metrics/mpcf-si/2016-06-30_07.35.21_si_ml-1m_e20_tt-0.7_train-5_user-metrics.csv


In [14]:
df_cv_user_metrics.mean(0)

auc                   0.928802
avg_precision         0.300344
f1                    0.264216
fcp                   0.728689
precision_at_20       0.321118
recall_at_20          0.327697
reciprocal_rank       0.694122
spearman_rank_corr    0.225964
dtype: float64

In [15]:
df_max_train = get_df_results(user_metrics, max_train_ratings=20)
df_max_train.mean(0)

auc                   0.900662
avg_precision         0.223898
f1                    0.168515
fcp                   0.728353
precision_at_20       0.110221
recall_at_20          0.428287
reciprocal_rank       0.472053
spearman_rank_corr    0.218878
dtype: float64

In [16]:
with open('../models/mpcf-si/2016-06-29_13.23.36_si_ml-1m_e20_tt-0.7_task-0_config.json') as f:
    pp.pprint(json.loads(f.read()))

{   u'ada_eps': 1e-06,
    u'adagrad': True,
    u'binarize': True,
    u'binarize_neg': 0,
    u'binarize_pos': 1,
    u'binarize_threshold': 1,
    u'eval_in_parallel': True,
    u'experiment_name': u'si_ml-1m_e20_tt-0.7_task-0',
    u'hit_threshold': 4,
    u'init_params_scale': 0.001,
    u'lr': 0.03,
    u'lr_decay': 0.02,
    u'metrics_save_dir': u'metrics/mpcf-si',
    u'model_save_dir': u'models/mpcf-si',
    u'nb_epochs': 20,
    u'nb_latent_f': 96,
    u'nb_user_pref': 2,
    u'pool_size': 4,
    u'precision_recall_at_n': 20,
    u'ratings_path': u'data/splits/ml-1m/ratings.csv',
    u'reg_lambda': 0.01,
    u'run_eval': True,
    u'run_movie_metrics': True,
    u'si_item_cosine_lambda': 2,
    u'si_item_d2v_model': u'doc2vec-models/2016-04-14_17.36.08_20e_pv-dbow_size50_lr0.025_window8_neg5',
    u'si_item_lambda_d_item_f': 0.0003,
    u'si_item_lr': 0.03,
    u'si_item_lr_decay': 0.02,
    u'si_item_model': True,
    u'si_item_nn': [96, 50],
    u'si_item_nn_hidden': [],
  

In [17]:
movie_metrics = [
'../metrics/mpcf-si/2016-06-29_13.23.36_si_ml-1m_e20_tt-0.7_task-0_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-30_07.38.02_si_ml-1m_e20_tt-0.7_train-2_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-30_07.32.45_si_ml-1m_e20_tt-0.7_train-3_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-30_07.36.22_si_ml-1m_e20_tt-0.7_train-4_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-30_07.35.21_si_ml-1m_e20_tt-0.7_train-5_movie-metrics.csv',
]


In [18]:
df_cv_movie_metrics = get_df_results(movie_metrics)
df_cv_movie_metrics

Unnamed: 0,auc,movie_id,nb_times_in_top_n_predictions,path
0,0.92237,1999.3961,203.093477,../metrics/mpcf-si/2016-06-29_13.23.36_si_ml-1m_e20_tt-0.7_task-0_movie-metrics.csv
0,0.919478,1999.3961,203.093477,../metrics/mpcf-si/2016-06-30_07.38.02_si_ml-1m_e20_tt-0.7_train-2_movie-metrics.csv
0,0.92243,1999.3961,203.093477,../metrics/mpcf-si/2016-06-30_07.32.45_si_ml-1m_e20_tt-0.7_train-3_movie-metrics.csv
0,0.921699,1999.3961,203.093477,../metrics/mpcf-si/2016-06-30_07.36.22_si_ml-1m_e20_tt-0.7_train-4_movie-metrics.csv
0,0.921523,1999.3961,203.093477,../metrics/mpcf-si/2016-06-30_07.35.21_si_ml-1m_e20_tt-0.7_train-5_movie-metrics.csv


In [19]:
df_cv_movie_metrics.mean(0)

auc                                 0.921500
movie_id                         1999.396100
nb_times_in_top_n_predictions     203.093477
dtype: float64

In [20]:
df_max_train = get_df_results(movie_metrics, max_train_ratings=10)
df_max_train.mean(0)

auc                                 0.880475
movie_id                         2113.639785
nb_times_in_top_n_predictions       0.003226
dtype: float64

# Gridsearch

## ML-100k - 0.7 train / 0.3 test - sparse item

In [21]:
user_metrics = [
'../metrics/mpcf-si/2016-06-27_07.00.04_si_ml-100k_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-27_07.01.21_si_ml-100k_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-27_07.09.35_si_ml-100k_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-27_07.12.39_si_ml-100k_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-27_08.11.03_si_ml-100k_e20_tt-0.7_task-1_user-metrics.csv',
'../metrics/mpcf-si/2016-06-27_08.11.53_si_ml-100k_e20_tt-0.7_task-1_user-metrics.csv',
'../metrics/mpcf-si/2016-06-27_08.23.04_si_ml-100k_e20_tt-0.7_task-1_user-metrics.csv',
'../metrics/mpcf-si/2016-06-27_08.37.57_si_ml-100k_e20_tt-0.7_task-1_user-metrics.csv',
'../metrics/mpcf-si/2016-06-27_09.22.13_si_ml-100k_e20_tt-0.7_task-2_user-metrics.csv',
'../metrics/mpcf-si/2016-06-27_09.23.28_si_ml-100k_e20_tt-0.7_task-2_user-metrics.csv',
'../metrics/mpcf-si/2016-06-27_09.32.27_si_ml-100k_e20_tt-0.7_task-2_user-metrics.csv',
'../metrics/mpcf-si/2016-06-27_10.04.17_si_ml-100k_e20_tt-0.7_task-2_user-metrics.csv',
'../metrics/mpcf-si/2016-06-27_10.32.14_si_ml-100k_e20_tt-0.7_task-3_user-metrics.csv',
'../metrics/mpcf-si/2016-06-27_10.35.13_si_ml-100k_e20_tt-0.7_task-3_user-metrics.csv',
'../metrics/mpcf-si/2016-06-27_10.42.18_si_ml-100k_e20_tt-0.7_task-3_user-metrics.csv',
'../metrics/mpcf-si/2016-06-27_11.21.21_si_ml-100k_e20_tt-0.7_task-3_user-metrics.csv',
'../metrics/mpcf-si/2016-06-27_11.42.52_si_ml-100k_e20_tt-0.7_task-4_user-metrics.csv',
'../metrics/mpcf-si/2016-06-27_11.46.08_si_ml-100k_e20_tt-0.7_task-4_user-metrics.csv',
'../metrics/mpcf-si/2016-06-27_11.56.11_si_ml-100k_e20_tt-0.7_task-4_user-metrics.csv',
'../metrics/mpcf-si/2016-06-27_12.39.54_si_ml-100k_e20_tt-0.7_task-4_user-metrics.csv',
'../metrics/mpcf-si/2016-06-27_12.57.29_si_ml-100k_e20_tt-0.7_task-5_user-metrics.csv',
'../metrics/mpcf-si/2016-06-27_13.03.00_si_ml-100k_e20_tt-0.7_task-5_user-metrics.csv',
'../metrics/mpcf-si/2016-06-27_13.10.07_si_ml-100k_e20_tt-0.7_task-5_user-metrics.csv',
'../metrics/mpcf-si/2016-06-27_14.05.13_si_ml-100k_e20_tt-0.7_task-5_user-metrics.csv',
]

In [22]:
df_user_metrics = get_df_results(user_metrics)
df_user_metrics.sort_values('auc', ascending=False)

Unnamed: 0,auc,avg_precision,f1,fcp,precision_at_20,recall_at_20,reciprocal_rank,spearman_rank_corr,path
0,0.937411,0.333537,0.284084,0.718398,0.278791,0.440227,0.669648,0.200229,../metrics/mpcf-si/2016-06-27_08.23.04_si_ml-100k_e20_tt-0.7_task-1_user-metrics.csv
0,0.936784,0.332068,0.280691,0.719122,0.276352,0.434633,0.67188,0.200899,../metrics/mpcf-si/2016-06-27_11.42.52_si_ml-100k_e20_tt-0.7_task-4_user-metrics.csv
0,0.936597,0.331384,0.282344,0.717785,0.277147,0.437757,0.662893,0.200089,../metrics/mpcf-si/2016-06-27_09.32.27_si_ml-100k_e20_tt-0.7_task-2_user-metrics.csv
0,0.936581,0.331488,0.281928,0.719348,0.2772,0.437891,0.666113,0.203792,../metrics/mpcf-si/2016-06-27_07.01.21_si_ml-100k_e20_tt-0.7_task-0_user-metrics.csv
0,0.93654,0.334586,0.281521,0.718083,0.276776,0.435515,0.675921,0.20116,../metrics/mpcf-si/2016-06-27_07.12.39_si_ml-100k_e20_tt-0.7_task-0_user-metrics.csv
0,0.936377,0.327789,0.282175,0.718408,0.27789,0.435825,0.654842,0.199957,../metrics/mpcf-si/2016-06-27_08.37.57_si_ml-100k_e20_tt-0.7_task-1_user-metrics.csv
0,0.936251,0.330704,0.280782,0.717558,0.276034,0.435067,0.665047,0.196737,../metrics/mpcf-si/2016-06-27_11.56.11_si_ml-100k_e20_tt-0.7_task-4_user-metrics.csv
0,0.936186,0.334857,0.28299,0.719346,0.278791,0.438555,0.669115,0.203463,../metrics/mpcf-si/2016-06-27_12.39.54_si_ml-100k_e20_tt-0.7_task-4_user-metrics.csv
0,0.936041,0.330959,0.281101,0.71848,0.276511,0.434104,0.672422,0.200834,../metrics/mpcf-si/2016-06-27_11.21.21_si_ml-100k_e20_tt-0.7_task-3_user-metrics.csv
0,0.935651,0.332576,0.279852,0.717486,0.27508,0.431947,0.665538,0.198269,../metrics/mpcf-si/2016-06-27_08.11.53_si_ml-100k_e20_tt-0.7_task-1_user-metrics.csv


In [23]:
# best config 
with open('../models/mpcf-si/2016-06-27_08.23.04_si_ml-100k_e20_tt-0.7_task-1_config.json') as f:
    pp.pprint(json.loads(f.read()))

{   u'ada_eps': 1e-06,
    u'adagrad': True,
    u'binarize': True,
    u'binarize_neg': 0,
    u'binarize_pos': 1,
    u'binarize_threshold': 1,
    u'eval_in_parallel': True,
    u'experiment_name': u'si_ml-100k_e20_tt-0.7_task-1',
    u'hit_threshold': 4,
    u'init_params_scale': 0.001,
    u'lr': 0.01,
    u'lr_decay': 0.02,
    u'metrics_save_dir': u'metrics/mpcf-si',
    u'model_save_dir': u'models/mpcf-si',
    u'nb_epochs': 20,
    u'nb_latent_f': 128,
    u'nb_user_pref': 4,
    u'pool_size': 4,
    u'precision_recall_at_n': 20,
    u'ratings_path': u'data/splits/ml-100k/ratings.csv',
    u'reg_lambda': 0.01,
    u'run_eval': True,
    u'run_movie_metrics': True,
    u'si_item_cosine_lambda': 0.7,
    u'si_item_d2v_model': u'doc2vec-models/2016-04-14_17.36.08_20e_pv-dbow_size50_lr0.025_window8_neg5',
    u'si_item_lambda_d_item_f': 0.001,
    u'si_item_lr': 0.03,
    u'si_item_lr_decay': 0.02,
    u'si_item_model': True,
    u'si_item_nn': [128, 50],
    u'si_item_nn_hidden':

## ML-1M - 0.7 train / 0.3 test - sparse item

In [24]:
user_metrics = [
'../metrics/mpcf-si/2016-06-28_23.02.21_si_ml-1m_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-28_23.08.21_si_ml-1m_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-28_23.12.00_si_ml-1m_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-28_23.14.30_si_ml-1m_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-28_23.16.48_si_ml-1m_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-28_23.19.34_si_ml-1m_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-28_23.20.06_si_ml-1m_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-28_23.20.44_si_ml-1m_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-28_23.25.09_si_ml-1m_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-28_23.26.23_si_ml-1m_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-28_23.27.46_si_ml-1m_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-28_23.34.21_si_ml-1m_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-29_13.10.54_si_ml-1m_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-29_13.23.36_si_ml-1m_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-29_13.27.13_si_ml-1m_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-29_13.28.17_si_ml-1m_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-29_14.06.44_si_ml-1m_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-29_14.10.27_si_ml-1m_e20_tt-0.7_task-0_user-metrics.csv',
]

In [25]:
df_user_metrics = get_df_results(user_metrics)
df_user_metrics.sort_values('auc', ascending=False)

Unnamed: 0,auc,avg_precision,f1,fcp,precision_at_20,recall_at_20,reciprocal_rank,spearman_rank_corr,path
0,0.928988,0.300825,0.264203,0.728434,0.320969,0.328111,0.696645,0.22523,../metrics/mpcf-si/2016-06-29_13.23.36_si_ml-1m_e20_tt-0.7_task-0_user-metrics.csv
0,0.928735,0.309179,0.271141,0.728191,0.328816,0.336959,0.704874,0.224742,../metrics/mpcf-si/2016-06-29_13.10.54_si_ml-1m_e20_tt-0.7_task-0_user-metrics.csv
0,0.92867,0.309782,0.271534,0.728656,0.328535,0.338058,0.707534,0.226035,../metrics/mpcf-si/2016-06-29_13.28.17_si_ml-1m_e20_tt-0.7_task-0_user-metrics.csv
0,0.928578,0.309499,0.270668,0.72827,0.32822,0.336193,0.710218,0.224839,../metrics/mpcf-si/2016-06-29_13.27.13_si_ml-1m_e20_tt-0.7_task-0_user-metrics.csv
0,0.927725,0.312491,0.272789,0.726539,0.329685,0.340367,0.715861,0.219253,../metrics/mpcf-si/2016-06-28_23.20.44_si_ml-1m_e20_tt-0.7_task-0_user-metrics.csv
0,0.927675,0.317031,0.275792,0.727723,0.333891,0.342405,0.718635,0.223161,../metrics/mpcf-si/2016-06-29_14.06.44_si_ml-1m_e20_tt-0.7_task-0_user-metrics.csv
0,0.927415,0.309615,0.27051,0.72888,0.328013,0.33515,0.714659,0.226767,../metrics/mpcf-si/2016-06-29_14.10.27_si_ml-1m_e20_tt-0.7_task-0_user-metrics.csv
0,0.926854,0.311607,0.27208,0.727673,0.328758,0.338695,0.715992,0.223045,../metrics/mpcf-si/2016-06-28_23.20.06_si_ml-1m_e20_tt-0.7_task-0_user-metrics.csv
0,0.926507,0.313363,0.273233,0.727455,0.330025,0.340378,0.720202,0.222292,../metrics/mpcf-si/2016-06-28_23.26.23_si_ml-1m_e20_tt-0.7_task-0_user-metrics.csv
0,0.925499,0.305424,0.268311,0.72804,0.325008,0.333416,0.707661,0.22431,../metrics/mpcf-si/2016-06-28_23.02.21_si_ml-1m_e20_tt-0.7_task-0_user-metrics.csv


In [26]:
with open('../models/mpcf-si/2016-06-29_13.23.36_si_ml-1m_e20_tt-0.7_task-0_config.json') as f:
    pp.pprint(json.loads(f.read()))

{   u'ada_eps': 1e-06,
    u'adagrad': True,
    u'binarize': True,
    u'binarize_neg': 0,
    u'binarize_pos': 1,
    u'binarize_threshold': 1,
    u'eval_in_parallel': True,
    u'experiment_name': u'si_ml-1m_e20_tt-0.7_task-0',
    u'hit_threshold': 4,
    u'init_params_scale': 0.001,
    u'lr': 0.03,
    u'lr_decay': 0.02,
    u'metrics_save_dir': u'metrics/mpcf-si',
    u'model_save_dir': u'models/mpcf-si',
    u'nb_epochs': 20,
    u'nb_latent_f': 96,
    u'nb_user_pref': 2,
    u'pool_size': 4,
    u'precision_recall_at_n': 20,
    u'ratings_path': u'data/splits/ml-1m/ratings.csv',
    u'reg_lambda': 0.01,
    u'run_eval': True,
    u'run_movie_metrics': True,
    u'si_item_cosine_lambda': 2,
    u'si_item_d2v_model': u'doc2vec-models/2016-04-14_17.36.08_20e_pv-dbow_size50_lr0.025_window8_neg5',
    u'si_item_lambda_d_item_f': 0.0003,
    u'si_item_lr': 0.03,
    u'si_item_lr_decay': 0.02,
    u'si_item_model': True,
    u'si_item_nn': [96, 50],
    u'si_item_nn_hidden': [],
  

### Compare number of epochs

In [27]:
user_metrics = [
'../metrics/mpcf-si/2016-06-27_14.53.58_si_ml-100k_e5_tt-0.7_best_user-metrics.csv',
'../metrics/mpcf-si/2016-06-27_15.12.16_si_ml-100k_e10_tt-0.7_best_user-metrics.csv',
'../metrics/mpcf-si/2016-06-27_15.30.19_si_ml-100k_e15_tt-0.7_best_user-metrics.csv',
]

In [28]:
df_user_metrics = get_df_results(user_metrics)
df_user_metrics.sort_values('auc', ascending=False)

Unnamed: 0,auc,avg_precision,f1,fcp,precision_at_20,recall_at_20,reciprocal_rank,spearman_rank_corr,path
0,0.936146,0.325539,0.277592,0.717452,0.273754,0.429009,0.661242,0.198913,../metrics/mpcf-si/2016-06-27_15.30.19_si_ml-100k_e15_tt-0.7_best_user-metrics.csv
0,0.935003,0.308787,0.269772,0.718055,0.266119,0.415944,0.640883,0.199785,../metrics/mpcf-si/2016-06-27_15.12.16_si_ml-100k_e10_tt-0.7_best_user-metrics.csv
0,0.930361,0.283011,0.252367,0.714867,0.249788,0.39241,0.613172,0.189408,../metrics/mpcf-si/2016-06-27_14.53.58_si_ml-100k_e5_tt-0.7_best_user-metrics.csv


## ML-100k - 0.7 train / 0.3 test - only 1 or 2 user pref - no binarize - sparse item

In [29]:
user_metrics = [
'../metrics/mpcf-si/2016-06-13_20.32.40_si_ml-100k_e20_tt-0.7_task-2_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_02.13.50_si_ml-100k_e20_tt-0.7_task-1_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_02.42.34_si_ml-100k_e20_tt-0.7_task-3_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_02.42.36_si_ml-100k_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_05.12.16_si_ml-100k_e20_tt-0.7_task-7_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_05.16.08_si_ml-100k_e20_tt-0.7_task-4_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_07.47.57_si_ml-100k_e20_tt-0.7_task-8_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_08.23.27_si_ml-100k_e20_tt-0.7_task-5_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_08.54.26_si_ml-100k_e20_tt-0.7_task-10_user-metrics.csv',
]

In [30]:
df_results = get_df_results(user_metrics)
df_results.sort_values('auc', ascending=False)

Unnamed: 0,auc,avg_precision,f1,fcp,precision_at_20,recall_at_20,reciprocal_rank,spearman_rank_corr,path
0,0.923356,0.34837,0.289676,0.75005,0.28632,0.447321,0.700376,0.292892,../metrics/mpcf-si/2016-06-14_08.23.27_si_ml-100k_e20_tt-0.7_task-5_user-metrics.csv
0,0.916473,0.352538,0.290847,0.744711,0.288494,0.444731,0.711323,0.277453,../metrics/mpcf-si/2016-06-14_08.54.26_si_ml-100k_e20_tt-0.7_task-10_user-metrics.csv
0,0.909906,0.374007,0.306835,0.739,0.303287,0.468608,0.728469,0.260242,../metrics/mpcf-si/2016-06-14_05.16.08_si_ml-100k_e20_tt-0.7_task-4_user-metrics.csv
0,0.90938,0.249534,0.226138,0.73803,0.226564,0.343813,0.585988,0.258501,../metrics/mpcf-si/2016-06-14_02.42.36_si_ml-100k_e20_tt-0.7_task-0_user-metrics.csv
0,0.904945,0.236814,0.214204,0.735245,0.216649,0.321243,0.568623,0.250564,../metrics/mpcf-si/2016-06-14_02.13.50_si_ml-100k_e20_tt-0.7_task-1_user-metrics.csv
0,0.904,0.23855,0.215654,0.734036,0.217922,0.323383,0.573838,0.246161,../metrics/mpcf-si/2016-06-14_02.42.34_si_ml-100k_e20_tt-0.7_task-3_user-metrics.csv
0,0.843826,0.147665,0.141233,0.724592,0.14597,0.206045,0.439783,0.217316,../metrics/mpcf-si/2016-06-13_20.32.40_si_ml-100k_e20_tt-0.7_task-2_user-metrics.csv
0,0.832356,0.140739,0.135059,0.73112,0.140774,0.193298,0.425854,0.236552,../metrics/mpcf-si/2016-06-14_05.12.16_si_ml-100k_e20_tt-0.7_task-7_user-metrics.csv
0,0.830544,0.140658,0.134967,0.732705,0.140774,0.194362,0.425417,0.241006,../metrics/mpcf-si/2016-06-14_07.47.57_si_ml-100k_e20_tt-0.7_task-8_user-metrics.csv


In [31]:
# best config
with open('../models/mpcf-si/2016-06-14_08.23.27_si_ml-100k_e20_tt-0.7_task-5_config.json') as f:
    pp.pprint(json.loads(f.read()))

{   u'ada_eps': 1e-06,
    u'adagrad': True,
    u'binarize': False,
    u'd2v_model': u'doc2vec-models/2016-04-14_17.36.08_20e_pv-dbow_size50_lr0.025_window8_neg5',
    u'eval_in_parallel': False,
    u'experiment_name': u'si_ml-100k_e20_tt-0.7_task-5',
    u'hit_threshold': 4,
    u'init_params_scale': 0.001,
    u'lr': 0.03,
    u'lr_decay': 0.02,
    u'metrics_save_dir': u'metrics/mpcf-si',
    u'model_save_dir': u'models/mpcf-si',
    u'nb_d2v_features': 50,
    u'nb_epochs': 20,
    u'nb_latent_f': 128,
    u'nb_user_pref': 2,
    u'precision_recall_at_n': 20,
    u'ratings_path': u'data/splits/ml-100k/ratings.csv',
    u'reg_lambda': 0.01,
    u'run_eval': True,
    u'run_movie_metrics': True,
    u'si_lambda_delta_qi': 0.01,
    u'si_lr': 0.001,
    u'si_lr_decay': 0.02,
    u'si_model': True,
    u'si_nn': [128, 160, 50],
    u'si_reg_lambda': 0.03,
    u'sparse_item': True,
    u'test': True,
    u'test_path': u'data/splits/ml-100k/sparse-item/0.7-test.csv',
    u'top_n_predi

In [32]:
# second best config
with open('../models/mpcf-si/2016-06-14_08.54.26_si_ml-100k_e20_tt-0.7_task-10_config.json') as f:
    pp.pprint(json.loads(f.read()))

{   u'ada_eps': 1e-06,
    u'adagrad': True,
    u'binarize': False,
    u'd2v_model': u'doc2vec-models/2016-04-14_17.36.08_20e_pv-dbow_size50_lr0.025_window8_neg5',
    u'eval_in_parallel': False,
    u'experiment_name': u'si_ml-100k_e20_tt-0.7_task-10',
    u'hit_threshold': 4,
    u'init_params_scale': 0.001,
    u'lr': 0.03,
    u'lr_decay': 0.0005,
    u'metrics_save_dir': u'metrics/mpcf-si',
    u'model_save_dir': u'models/mpcf-si',
    u'nb_d2v_features': 50,
    u'nb_epochs': 20,
    u'nb_latent_f': 128,
    u'nb_user_pref': 1,
    u'precision_recall_at_n': 20,
    u'ratings_path': u'data/splits/ml-100k/ratings.csv',
    u'reg_lambda': 0.01,
    u'run_eval': True,
    u'run_movie_metrics': True,
    u'si_lambda_delta_qi': 0.001,
    u'si_lr': 0.003,
    u'si_lr_decay': 0.0005,
    u'si_model': True,
    u'si_nn': [128, 50],
    u'si_reg_lambda': 0.003,
    u'sparse_item': True,
    u'test': True,
    u'test_path': u'data/splits/ml-100k/sparse-item/0.7-test.csv',
    u'top_n_pre

In [33]:
movie_metrics = [
'../metrics/mpcf-si/2016-06-13_20.32.40_si_ml-100k_e20_tt-0.7_task-2_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_02.13.50_si_ml-100k_e20_tt-0.7_task-1_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_02.42.34_si_ml-100k_e20_tt-0.7_task-3_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_02.42.36_si_ml-100k_e20_tt-0.7_task-0_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_05.12.16_si_ml-100k_e20_tt-0.7_task-7_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_05.16.08_si_ml-100k_e20_tt-0.7_task-4_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_07.47.57_si_ml-100k_e20_tt-0.7_task-8_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_08.23.27_si_ml-100k_e20_tt-0.7_task-5_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_08.54.26_si_ml-100k_e20_tt-0.7_task-10_movie-metrics.csv',
]

In [34]:
df_results = get_df_results(movie_metrics)
df_results.sort_values('auc', ascending=False)

Unnamed: 0,auc,movie_id,nb_times_in_top_n_predictions,path
0,0.881204,708.548841,78.061258,../metrics/mpcf-si/2016-06-14_08.23.27_si_ml-100k_e20_tt-0.7_task-5_movie-metrics.csv
0,0.875104,708.548841,78.061258,../metrics/mpcf-si/2016-06-14_08.54.26_si_ml-100k_e20_tt-0.7_task-10_movie-metrics.csv
0,0.874621,708.548841,78.062914,../metrics/mpcf-si/2016-06-14_02.42.36_si_ml-100k_e20_tt-0.7_task-0_movie-metrics.csv
0,0.867298,708.548841,78.062914,../metrics/mpcf-si/2016-06-14_02.13.50_si_ml-100k_e20_tt-0.7_task-1_movie-metrics.csv
0,0.866641,708.548841,78.062914,../metrics/mpcf-si/2016-06-14_02.42.34_si_ml-100k_e20_tt-0.7_task-3_movie-metrics.csv
0,0.861273,708.548841,78.04553,../metrics/mpcf-si/2016-06-14_05.16.08_si_ml-100k_e20_tt-0.7_task-4_movie-metrics.csv
0,0.797379,708.548841,78.062914,../metrics/mpcf-si/2016-06-13_20.32.40_si_ml-100k_e20_tt-0.7_task-2_movie-metrics.csv
0,0.776498,708.548841,78.062914,../metrics/mpcf-si/2016-06-14_05.12.16_si_ml-100k_e20_tt-0.7_task-7_movie-metrics.csv
0,0.770206,708.548841,78.062914,../metrics/mpcf-si/2016-06-14_07.47.57_si_ml-100k_e20_tt-0.7_task-8_movie-metrics.csv


## ML-100k - 0.2 train / 0.8 test - only 1 or 2 user pref - no binarize - sparse item

In [35]:
user_metrics = [
'../metrics/mpcf-si/2016-06-13_18.33.31_si_ml-100k_e20_tt-0.2_task-2_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_18.41.59_si_ml-100k_e20_tt-0.2_task-1_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_19.03.06_si_ml-100k_e20_tt-0.2_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_19.16.52_si_ml-100k_e20_tt-0.2_task-4_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_19.59.10_si_ml-100k_e20_tt-0.2_task-7_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_20.06.33_si_ml-100k_e20_tt-0.2_task-3_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_20.27.26_si_ml-100k_e20_tt-0.2_task-8_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_21.05.59_si_ml-100k_e20_tt-0.2_task-10_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_21.19.05_si_ml-100k_e20_tt-0.2_task-5_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_21.27.40_si_ml-100k_e20_tt-0.2_task-11_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_21.41.54_si_ml-100k_e20_tt-0.2_task-6_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_22.19.28_si_ml-100k_e20_tt-0.2_task-9_user-metrics.csv',
]

In [36]:
df_results = get_df_results(user_metrics)
df_results.sort_values('auc', ascending=False)

Unnamed: 0,auc,avg_precision,f1,fcp,precision_at_20,recall_at_20,reciprocal_rank,spearman_rank_corr,path
0,0.872824,0.319498,0.26117,0.720026,0.391039,0.260872,0.739518,0.213639,../metrics/mpcf-si/2016-06-13_19.59.10_si_ml-100k_e20_tt-0.2_task-7_user-metrics.csv
0,0.866678,0.311412,0.255594,0.716262,0.384464,0.253879,0.729688,0.202168,../metrics/mpcf-si/2016-06-13_18.41.59_si_ml-100k_e20_tt-0.2_task-1_user-metrics.csv
0,0.864149,0.261213,0.216963,0.725548,0.337222,0.212278,0.67014,0.230277,../metrics/mpcf-si/2016-06-13_18.33.31_si_ml-100k_e20_tt-0.2_task-2_user-metrics.csv
0,0.86209,0.257834,0.215673,0.72492,0.33351,0.211794,0.658596,0.228361,../metrics/mpcf-si/2016-06-13_21.19.05_si_ml-100k_e20_tt-0.2_task-5_user-metrics.csv
0,0.857224,0.314416,0.262846,0.712433,0.394115,0.262637,0.752623,0.191662,../metrics/mpcf-si/2016-06-13_20.27.26_si_ml-100k_e20_tt-0.2_task-8_user-metrics.csv
0,0.856939,0.249848,0.201621,0.725404,0.316384,0.197186,0.655211,0.2297,../metrics/mpcf-si/2016-06-13_22.19.28_si_ml-100k_e20_tt-0.2_task-9_user-metrics.csv
0,0.843233,0.295209,0.248227,0.712952,0.373065,0.247121,0.711294,0.192518,../metrics/mpcf-si/2016-06-13_20.06.33_si_ml-100k_e20_tt-0.2_task-3_user-metrics.csv
0,0.834434,0.216676,0.173069,0.726642,0.269247,0.169993,0.624945,0.233195,../metrics/mpcf-si/2016-06-13_21.41.54_si_ml-100k_e20_tt-0.2_task-6_user-metrics.csv
0,0.830866,0.28558,0.242142,0.708156,0.364687,0.240928,0.710368,0.178069,../metrics/mpcf-si/2016-06-13_21.05.59_si_ml-100k_e20_tt-0.2_task-10_user-metrics.csv
0,0.826472,0.304804,0.259735,0.707838,0.389555,0.258879,0.728161,0.177421,../metrics/mpcf-si/2016-06-13_21.27.40_si_ml-100k_e20_tt-0.2_task-11_user-metrics.csv


In [37]:
# best config
with open('../models/mpcf-si/2016-06-13_19.59.10_si_ml-100k_e20_tt-0.2_task-7_config.json') as f:
    pp.pprint(json.loads(f.read()))

{   u'ada_eps': 1e-06,
    u'adagrad': True,
    u'binarize': False,
    u'd2v_model': u'doc2vec-models/2016-04-14_17.36.08_20e_pv-dbow_size50_lr0.025_window8_neg5',
    u'eval_in_parallel': False,
    u'experiment_name': u'si_ml-100k_e20_tt-0.2_task-7',
    u'hit_threshold': 4,
    u'init_params_scale': 0.001,
    u'lr': 0.03,
    u'lr_decay': 0.0005,
    u'metrics_save_dir': u'metrics/mpcf-si',
    u'model_save_dir': u'models/mpcf-si',
    u'nb_d2v_features': 50,
    u'nb_epochs': 20,
    u'nb_latent_f': 128,
    u'nb_user_pref': 2,
    u'precision_recall_at_n': 20,
    u'ratings_path': u'data/splits/ml-100k/ratings.csv',
    u'reg_lambda': 0.04,
    u'run_eval': True,
    u'run_movie_metrics': True,
    u'si_lambda_delta_qi': 0.001,
    u'si_lr': 0.003,
    u'si_lr_decay': 0.02,
    u'si_model': True,
    u'si_nn': [128, 50],
    u'si_reg_lambda': 0.06,
    u'sparse_item': True,
    u'test': True,
    u'test_path': u'data/splits/ml-100k/sparse-item/0.2-test.csv',
    u'top_n_predict

In [38]:
movie_metrics = [
'../metrics/mpcf-si/2016-06-13_18.33.31_si_ml-100k_e20_tt-0.2_task-2_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_18.41.59_si_ml-100k_e20_tt-0.2_task-1_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_19.03.06_si_ml-100k_e20_tt-0.2_task-0_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_19.16.52_si_ml-100k_e20_tt-0.2_task-4_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_19.59.10_si_ml-100k_e20_tt-0.2_task-7_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_20.06.33_si_ml-100k_e20_tt-0.2_task-3_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_20.27.26_si_ml-100k_e20_tt-0.2_task-8_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_21.05.59_si_ml-100k_e20_tt-0.2_task-10_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_21.19.05_si_ml-100k_e20_tt-0.2_task-5_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_21.27.40_si_ml-100k_e20_tt-0.2_task-11_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_21.41.54_si_ml-100k_e20_tt-0.2_task-6_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_22.19.28_si_ml-100k_e20_tt-0.2_task-9_movie-metrics.csv',
]

In [39]:
df_results = get_df_results(movie_metrics)
df_results.sort_values('auc', ascending=False)

Unnamed: 0,auc,movie_id,nb_times_in_top_n_predictions,path
0,0.81981,743.317209,74.781919,../metrics/mpcf-si/2016-06-13_18.33.31_si_ml-100k_e20_tt-0.2_task-2_movie-metrics.csv
0,0.819287,743.317209,74.781919,../metrics/mpcf-si/2016-06-13_19.59.10_si_ml-100k_e20_tt-0.2_task-7_movie-metrics.csv
0,0.817092,743.317209,74.781919,../metrics/mpcf-si/2016-06-13_21.19.05_si_ml-100k_e20_tt-0.2_task-5_movie-metrics.csv
0,0.813141,743.317209,74.781919,../metrics/mpcf-si/2016-06-13_18.41.59_si_ml-100k_e20_tt-0.2_task-1_movie-metrics.csv
0,0.81228,743.317209,74.781919,../metrics/mpcf-si/2016-06-13_22.19.28_si_ml-100k_e20_tt-0.2_task-9_movie-metrics.csv
0,0.809432,743.317209,74.781919,../metrics/mpcf-si/2016-06-13_20.27.26_si_ml-100k_e20_tt-0.2_task-8_movie-metrics.csv
0,0.788353,743.317209,74.781919,../metrics/mpcf-si/2016-06-13_21.41.54_si_ml-100k_e20_tt-0.2_task-6_movie-metrics.csv
0,0.783707,743.317209,74.781919,../metrics/mpcf-si/2016-06-13_20.06.33_si_ml-100k_e20_tt-0.2_task-3_movie-metrics.csv
0,0.775064,743.317209,74.781919,../metrics/mpcf-si/2016-06-13_21.05.59_si_ml-100k_e20_tt-0.2_task-10_movie-metrics.csv
0,0.769275,743.317209,74.781919,../metrics/mpcf-si/2016-06-13_21.27.40_si_ml-100k_e20_tt-0.2_task-11_movie-metrics.csv


## ML-100k - 0.2 train / 0.8 test - only 1 or 2 user pref - no binarize - sparse user

In [40]:
user_metrics = [
'../metrics/mpcf-si/2016-06-13_19.32.32_si_ml-100k_e20_tt-0.2_sparse-user_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_19.40.56_si_ml-100k_e20_tt-0.2_sparse-user_task-3_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_19.52.33_si_ml-100k_e20_tt-0.2_sparse-user_task-2_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_19.54.07_si_ml-100k_e20_tt-0.2_sparse-user_task-4_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_20.03.15_si_ml-100k_e20_tt-0.2_sparse-user_task-5_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_20.18.12_si_ml-100k_e20_tt-0.2_sparse-user_task-6_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_20.21.14_si_ml-100k_e20_tt-0.2_sparse-user_task-8_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_21.02.36_si_ml-100k_e20_tt-0.2_sparse-user_task-9_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_21.29.26_si_ml-100k_e20_tt-0.2_sparse-user_task-11_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_21.50.13_si_ml-100k_e20_tt-0.2_sparse-user_task-1_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_22.03.25_si_ml-100k_e10_tt-0.2_sparse-user_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_22.15.23_si_ml-100k_e20_tt-0.2_sparse-user_task-7_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_22.20.42_si_ml-100k_e10_tt-0.2_sparse-user_task-1_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_22.25.31_si_ml-100k_e10_tt-0.2_sparse-user_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_22.29.32_si_ml-100k_e10_tt-0.2_sparse-user_task-0_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_22.34.01_si_ml-100k_e10_tt-0.2_sparse-user_task-1_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_22.35.36_si_ml-100k_e20_tt-0.2_sparse-user_task-10_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_22.39.47_si_ml-100k_e10_tt-0.2_sparse-user_task-2_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_22.43.48_si_ml-100k_e10_tt-0.2_sparse-user_task-2_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_23.03.12_si_ml-100k_e10_tt-0.2_sparse-user_task-3_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_23.16.27_si_ml-100k_e10_tt-0.2_sparse-user_task-4_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_23.28.40_si_ml-100k_e10_tt-0.2_sparse-user_task-1_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_23.34.04_si_ml-100k_e10_tt-0.2_sparse-user_task-3_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_23.39.44_si_ml-100k_e10_tt-0.2_sparse-user_task-5_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_23.48.50_si_ml-100k_e10_tt-0.2_sparse-user_task-6_user-metrics.csv',
'../metrics/mpcf-si/2016-06-13_23.56.41_si_ml-100k_e10_tt-0.2_sparse-user_task-4_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_00.05.20_si_ml-100k_e10_tt-0.2_sparse-user_task-5_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_00.09.50_si_ml-100k_e10_tt-0.2_sparse-user_task-7_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_00.25.40_si_ml-100k_e10_tt-0.2_sparse-user_task-6_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_00.46.27_si_ml-100k_e10_tt-0.2_sparse-user_task-2_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_00.48.39_si_ml-100k_e10_tt-0.2_sparse-user_task-7_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_01.27.00_si_ml-100k_e10_tt-0.2_sparse-user_task-8_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_01.35.29_si_ml-100k_e10_tt-0.2_sparse-user_task-8_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_01.56.07_si_ml-100k_e10_tt-0.2_sparse-user_task-9_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_02.06.35_si_ml-100k_e10_tt-0.2_sparse-user_task-3_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_02.29.55_si_ml-100k_e10_tt-0.2_sparse-user_task-4_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_02.45.53_si_ml-100k_e10_tt-0.2_sparse-user_task-9_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_02.46.07_si_ml-100k_e10_tt-0.2_sparse-user_task-5_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_02.54.54_si_ml-100k_e10_tt-0.2_sparse-user_task-6_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_03.01.41_si_ml-100k_e10_tt-0.2_sparse-user_task-10_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_03.16.57_si_ml-100k_e10_tt-0.2_sparse-user_task-10_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_03.26.55_si_ml-100k_e10_tt-0.2_sparse-user_task-7_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_03.30.49_si_ml-100k_e10_tt-0.2_sparse-user_task-11_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_03.38.44_si_ml-100k_e10_tt-0.2_sparse-user_task-11_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_04.30.25_si_ml-100k_e10_tt-0.2_sparse-user_task-8_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_04.58.46_si_ml-100k_e10_tt-0.2_sparse-user_task-9_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_06.19.12_si_ml-100k_e10_tt-0.2_sparse-user_task-10_user-metrics.csv',
'../metrics/mpcf-si/2016-06-14_07.21.21_si_ml-100k_e10_tt-0.2_sparse-user_task-11_user-metrics.csv',
]

In [41]:
df_results = get_df_results(user_metrics)
df_results.sort_values('auc', ascending=False)

Unnamed: 0,auc,avg_precision,f1,fcp,precision_at_20,recall_at_20,reciprocal_rank,spearman_rank_corr,path
0,0.882308,0.324922,0.26265,0.722217,0.396235,0.259784,0.74039,0.21985,../metrics/mpcf-si/2016-06-13_20.21.14_si_ml-100k_e20_tt-0.2_sparse-user_task-8_user-metrics.csv
0,0.881285,0.296906,0.237904,0.72205,0.365005,0.23419,0.715851,0.219108,../metrics/mpcf-si/2016-06-13_22.39.47_si_ml-100k_e10_tt-0.2_sparse-user_task-2_user-metrics.csv
0,0.878038,0.3034,0.242337,0.722445,0.371633,0.239095,0.722887,0.220123,../metrics/mpcf-si/2016-06-14_00.09.50_si_ml-100k_e10_tt-0.2_sparse-user_task-7_user-metrics.csv
0,0.877196,0.289306,0.229772,0.723696,0.354984,0.225494,0.70951,0.224007,../metrics/mpcf-si/2016-06-14_03.38.44_si_ml-100k_e10_tt-0.2_sparse-user_task-11_user-metrics.csv
0,0.875995,0.307702,0.247421,0.720545,0.375822,0.244117,0.723192,0.214048,../metrics/mpcf-si/2016-06-14_04.58.46_si_ml-100k_e10_tt-0.2_sparse-user_task-9_user-metrics.csv
0,0.87484,0.306267,0.247463,0.720888,0.375027,0.244528,0.716555,0.215736,../metrics/mpcf-si/2016-06-13_23.56.41_si_ml-100k_e10_tt-0.2_sparse-user_task-4_user-metrics.csv
0,0.873602,0.281935,0.226889,0.7216,0.352333,0.22127,0.686478,0.217184,../metrics/mpcf-si/2016-06-14_02.06.35_si_ml-100k_e10_tt-0.2_sparse-user_task-3_user-metrics.csv
0,0.873054,0.304556,0.247482,0.723713,0.375133,0.244815,0.701319,0.22442,../metrics/mpcf-si/2016-06-14_03.30.49_si_ml-100k_e10_tt-0.2_sparse-user_task-11_user-metrics.csv
0,0.872528,0.291758,0.232064,0.727084,0.358961,0.227407,0.716471,0.234221,../metrics/mpcf-si/2016-06-13_23.48.50_si_ml-100k_e10_tt-0.2_sparse-user_task-6_user-metrics.csv
0,0.872245,0.286836,0.22917,0.725506,0.355037,0.224327,0.713765,0.229821,../metrics/mpcf-si/2016-06-13_22.43.48_si_ml-100k_e10_tt-0.2_sparse-user_task-2_user-metrics.csv


In [42]:
# best config
with open('../models/mpcf-si/2016-06-13_20.21.14_si_ml-100k_e20_tt-0.2_sparse-user_task-8_config.json') as f:
    pp.pprint(json.loads(f.read()))

{   u'ada_eps': 1e-06,
    u'adagrad': True,
    u'binarize': False,
    u'd2v_model': u'doc2vec-models/2016-04-14_17.36.08_20e_pv-dbow_size50_lr0.025_window8_neg5',
    u'eval_in_parallel': False,
    u'experiment_name': u'si_ml-100k_e20_tt-0.2_sparse-user_task-8',
    u'hit_threshold': 4,
    u'init_params_scale': 0.001,
    u'lr': 0.03,
    u'lr_decay': 0.0005,
    u'metrics_save_dir': u'metrics/mpcf-si',
    u'model_save_dir': u'models/mpcf-si',
    u'nb_d2v_features': 50,
    u'nb_epochs': 20,
    u'nb_latent_f': 128,
    u'nb_user_pref': 1,
    u'precision_recall_at_n': 20,
    u'ratings_path': u'data/splits/ml-100k/ratings.csv',
    u'reg_lambda': 0.06,
    u'run_eval': True,
    u'run_movie_metrics': True,
    u'si_lambda_delta_qi': 0.0003,
    u'si_lr': 0.0003,
    u'si_lr_decay': 0.02,
    u'si_model': True,
    u'si_nn': [128, 50],
    u'si_reg_lambda': 0.06,
    u'sparse_item': True,
    u'test': True,
    u'test_path': u'data/splits/ml-100k/no-sparse-item/0.2-test.csv',
  

In [43]:
movie_metrics = [
'../metrics/mpcf-si/2016-06-13_19.32.32_si_ml-100k_e20_tt-0.2_sparse-user_task-0_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_19.40.56_si_ml-100k_e20_tt-0.2_sparse-user_task-3_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_19.52.33_si_ml-100k_e20_tt-0.2_sparse-user_task-2_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_19.54.07_si_ml-100k_e20_tt-0.2_sparse-user_task-4_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_20.03.15_si_ml-100k_e20_tt-0.2_sparse-user_task-5_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_20.18.12_si_ml-100k_e20_tt-0.2_sparse-user_task-6_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_20.21.14_si_ml-100k_e20_tt-0.2_sparse-user_task-8_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_21.02.36_si_ml-100k_e20_tt-0.2_sparse-user_task-9_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_21.29.26_si_ml-100k_e20_tt-0.2_sparse-user_task-11_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_21.50.13_si_ml-100k_e20_tt-0.2_sparse-user_task-1_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_22.03.25_si_ml-100k_e10_tt-0.2_sparse-user_task-0_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_22.15.23_si_ml-100k_e20_tt-0.2_sparse-user_task-7_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_22.20.42_si_ml-100k_e10_tt-0.2_sparse-user_task-1_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_22.25.31_si_ml-100k_e10_tt-0.2_sparse-user_task-0_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_22.29.32_si_ml-100k_e10_tt-0.2_sparse-user_task-0_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_22.34.01_si_ml-100k_e10_tt-0.2_sparse-user_task-1_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_22.35.36_si_ml-100k_e20_tt-0.2_sparse-user_task-10_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_22.39.47_si_ml-100k_e10_tt-0.2_sparse-user_task-2_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_22.43.48_si_ml-100k_e10_tt-0.2_sparse-user_task-2_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_23.03.12_si_ml-100k_e10_tt-0.2_sparse-user_task-3_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_23.16.27_si_ml-100k_e10_tt-0.2_sparse-user_task-4_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_23.28.40_si_ml-100k_e10_tt-0.2_sparse-user_task-1_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_23.34.04_si_ml-100k_e10_tt-0.2_sparse-user_task-3_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_23.39.44_si_ml-100k_e10_tt-0.2_sparse-user_task-5_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_23.48.50_si_ml-100k_e10_tt-0.2_sparse-user_task-6_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-13_23.56.41_si_ml-100k_e10_tt-0.2_sparse-user_task-4_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_00.05.20_si_ml-100k_e10_tt-0.2_sparse-user_task-5_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_00.09.50_si_ml-100k_e10_tt-0.2_sparse-user_task-7_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_00.25.40_si_ml-100k_e10_tt-0.2_sparse-user_task-6_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_00.46.27_si_ml-100k_e10_tt-0.2_sparse-user_task-2_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_00.48.39_si_ml-100k_e10_tt-0.2_sparse-user_task-7_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_01.27.00_si_ml-100k_e10_tt-0.2_sparse-user_task-8_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_01.35.29_si_ml-100k_e10_tt-0.2_sparse-user_task-8_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_01.56.07_si_ml-100k_e10_tt-0.2_sparse-user_task-9_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_02.06.35_si_ml-100k_e10_tt-0.2_sparse-user_task-3_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_02.29.55_si_ml-100k_e10_tt-0.2_sparse-user_task-4_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_02.45.53_si_ml-100k_e10_tt-0.2_sparse-user_task-9_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_02.46.07_si_ml-100k_e10_tt-0.2_sparse-user_task-5_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_02.54.54_si_ml-100k_e10_tt-0.2_sparse-user_task-6_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_03.01.41_si_ml-100k_e10_tt-0.2_sparse-user_task-10_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_03.16.57_si_ml-100k_e10_tt-0.2_sparse-user_task-10_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_03.26.55_si_ml-100k_e10_tt-0.2_sparse-user_task-7_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_03.30.49_si_ml-100k_e10_tt-0.2_sparse-user_task-11_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_03.38.44_si_ml-100k_e10_tt-0.2_sparse-user_task-11_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_04.30.25_si_ml-100k_e10_tt-0.2_sparse-user_task-8_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_04.58.46_si_ml-100k_e10_tt-0.2_sparse-user_task-9_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_06.19.12_si_ml-100k_e10_tt-0.2_sparse-user_task-10_movie-metrics.csv',
'../metrics/mpcf-si/2016-06-14_07.21.21_si_ml-100k_e10_tt-0.2_sparse-user_task-11_movie-metrics.csv',
]

In [44]:
df_results = get_df_results(movie_metrics)
df_results.sort_values('auc', ascending=False)

Unnamed: 0,auc,movie_id,nb_times_in_top_n_predictions,path
0,0.839444,738.38738,75.319489,../metrics/mpcf-si/2016-06-14_03.38.44_si_ml-100k_e10_tt-0.2_sparse-user_task-11_movie-metrics.csv
0,0.83659,738.38738,75.319489,../metrics/mpcf-si/2016-06-13_22.39.47_si_ml-100k_e10_tt-0.2_sparse-user_task-2_movie-metrics.csv
0,0.836364,738.38738,75.319489,../metrics/mpcf-si/2016-06-13_20.21.14_si_ml-100k_e20_tt-0.2_sparse-user_task-8_movie-metrics.csv
0,0.835786,738.38738,75.319489,../metrics/mpcf-si/2016-06-13_22.25.31_si_ml-100k_e10_tt-0.2_sparse-user_task-0_movie-metrics.csv
0,0.833453,738.38738,75.319489,../metrics/mpcf-si/2016-06-13_22.43.48_si_ml-100k_e10_tt-0.2_sparse-user_task-2_movie-metrics.csv
0,0.833076,738.38738,75.319489,../metrics/mpcf-si/2016-06-14_02.06.35_si_ml-100k_e10_tt-0.2_sparse-user_task-3_movie-metrics.csv
0,0.832251,738.38738,75.319489,../metrics/mpcf-si/2016-06-14_00.46.27_si_ml-100k_e10_tt-0.2_sparse-user_task-2_movie-metrics.csv
0,0.831414,738.38738,75.319489,../metrics/mpcf-si/2016-06-14_00.09.50_si_ml-100k_e10_tt-0.2_sparse-user_task-7_movie-metrics.csv
0,0.831208,738.38738,75.319489,../metrics/mpcf-si/2016-06-14_01.56.07_si_ml-100k_e10_tt-0.2_sparse-user_task-9_movie-metrics.csv
0,0.831143,738.38738,75.319489,../metrics/mpcf-si/2016-06-13_23.48.50_si_ml-100k_e10_tt-0.2_sparse-user_task-6_movie-metrics.csv
