In [4]:
import numpy as np
import pandas as pd
import json
import pprint
import json
pp = pprint.PrettyPrinter(indent=4)
pd.options.display.max_colwidth = 100

In [5]:
def df_mean(path, apply_to_df):
    df_mean = pd.read_csv(path)
    df_mean = apply_to_df(df_mean)
        
    if 'nb_movies_not_in_train' in df_mean:
        df_mean = df_mean.drop(['nb_movies_not_in_train'],axis=1)
    if 'nb_test_ratings' in df_mean:
        df_mean = df_mean.drop(['nb_test_ratings'],axis=1)
    if 'nb_train_ratings' in df_mean:
        df_mean = df_mean.drop(['nb_train_ratings'],axis=1)
    if 'user_id' in df_mean:
        df_mean = df_mean.drop(['user_id'],axis=1)
    
    df_mean = df_mean.mean(0).to_frame().T
    df_mean['path'] = path
    
    return df_mean
    

def get_df_results(results, max_train_ratings=None):
    if not max_train_ratings:
        apply_to_df = lambda df: df[df['nb_test_ratings'] > 0]
    else:
        apply_to_df = lambda df: df[(df['nb_test_ratings'] > 0) & (df['nb_train_ratings'] <= max_train_ratings)]
        
    result_map = map(lambda path: df_mean(path, apply_to_df), results)
    return reduce(lambda x, y: x.append(y), result_map)

## BEST ML-100k - 0.7 train / 0.3 test - sparse item

In [19]:
user_metrics = [
'../metrics/mfnn/2016-07-06_00.04.50_mfnn_ml-100k_e20_tt-0.7_task-13_user-metrics.csv',
'../metrics/mfnn/2016-07-06_15.38.20_mfnn_ml-100k_e20_tt-0.7_train-2_user-metrics.csv',
'../metrics/mfnn/2016-07-06_15.40.55_mfnn_ml-100k_e20_tt-0.7_train-3_user-metrics.csv',
'../metrics/mfnn/2016-07-06_15.40.31_mfnn_ml-100k_e20_tt-0.7_train-4_user-metrics.csv',
'../metrics/mfnn/2016-07-06_15.42.05_mfnn_ml-100k_e20_tt-0.7_train-5_user-metrics.csv',
]

In [20]:
df_cv_user_metrics = get_df_results(user_metrics)
df_cv_user_metrics

Unnamed: 0,auc,avg_precision,f1,fcp,precision_at_20,recall_at_20,reciprocal_rank,spearman_rank_corr,path
0,0.93405,0.332305,0.284883,0.716626,0.281548,0.437622,0.659702,0.194061,../metrics/mfnn/2016-07-06_00.04.50_mfnn_ml-100k_e20_tt-0.7_task-13_user-metrics.csv
0,0.933799,0.335543,0.283013,0.717485,0.280806,0.435161,0.667004,0.204534,../metrics/mfnn/2016-07-06_15.38.20_mfnn_ml-100k_e20_tt-0.7_train-2_user-metrics.csv
0,0.93482,0.339741,0.293865,0.714156,0.290297,0.446345,0.677803,0.197429,../metrics/mfnn/2016-07-06_15.40.55_mfnn_ml-100k_e20_tt-0.7_train-3_user-metrics.csv
0,0.933849,0.33727,0.287112,0.711714,0.283033,0.437738,0.688017,0.191617,../metrics/mfnn/2016-07-06_15.40.31_mfnn_ml-100k_e20_tt-0.7_train-4_user-metrics.csv
0,0.935867,0.333826,0.289336,0.706866,0.28632,0.441209,0.678025,0.174695,../metrics/mfnn/2016-07-06_15.42.05_mfnn_ml-100k_e20_tt-0.7_train-5_user-metrics.csv


In [21]:
df_cv_user_metrics.mean(0)

auc                   0.934477
avg_precision         0.335737
f1                    0.287642
fcp                   0.713369
precision_at_20       0.284401
recall_at_20          0.439615
reciprocal_rank       0.674110
spearman_rank_corr    0.192467
dtype: float64

In [22]:
df_max_train = get_df_results(user_metrics, max_train_ratings=20)
df_max_train.mean(0)

auc                   0.950182
avg_precision         0.268419
f1                    0.178278
fcp                   0.689655
precision_at_20       0.112414
recall_at_20          0.537082
reciprocal_rank       0.491522
spearman_rank_corr    0.113468
dtype: float64

In [23]:
# best config
with open('../models/mfnn/2016-07-06_00.04.50_mfnn_ml-100k_e20_tt-0.7_task-13_config.json') as f:
    pp.pprint(json.loads(f.read()))

{   u'ada_eps': 1e-06,
    u'adagrad': True,
    u'binarize': True,
    u'binarize_neg': 0,
    u'binarize_pos': 1,
    u'binarize_threshold': 1,
    u'd2v_model': u'doc2vec-models/2016-04-14_17.36.08_20e_pv-dbow_size50_lr0.025_window8_neg5',
    u'eval_in_parallel': False,
    u'experiment_name': u'mfnn_ml-100k_e20_tt-0.7_task-13',
    u'hit_threshold': 4,
    u'lr': 0.03,
    u'lr_decay': 0.02,
    u'metrics_save_dir': u'metrics/mfnn',
    u'model_save_dir': u'models/mfnn',
    u'nb_d2v_features': 50,
    u'nb_epochs': 20,
    u'nb_latent_f': 96,
    u'nb_movies': 1261,
    u'nb_users': 943,
    u'precision_recall_at_n': 20,
    u'ratings_path': u'data/splits/ml-100k/ratings.csv',
    u'reg_lambda': 0.01,
    u'run_eval': True,
    u'run_movie_metrics': True,
    u'sparse_item': True,
    u'test': True,
    u'test_path': u'data/splits/ml-100k/sparse-item/0.7-test.csv',
    u'theano': True,
    u'top_n_predictions': 100,
    u'train_path': u'data/splits/ml-100k/sparse-item/0.7-train.c

In [24]:
movie_metrics = [
'../metrics/mfnn/2016-07-06_00.04.50_mfnn_ml-100k_e20_tt-0.7_task-13_movie-metrics.csv',
'../metrics/mfnn/2016-07-06_15.38.20_mfnn_ml-100k_e20_tt-0.7_train-2_movie-metrics.csv',
'../metrics/mfnn/2016-07-06_15.40.55_mfnn_ml-100k_e20_tt-0.7_train-3_movie-metrics.csv',
'../metrics/mfnn/2016-07-06_15.40.31_mfnn_ml-100k_e20_tt-0.7_train-4_movie-metrics.csv',
'../metrics/mfnn/2016-07-06_15.42.05_mfnn_ml-100k_e20_tt-0.7_train-5_movie-metrics.csv',
]


In [25]:
df_cv_movie_metrics = get_df_results(movie_metrics)
df_cv_movie_metrics

Unnamed: 0,auc,movie_id,nb_times_in_top_n_predictions,path
0,0.89835,708.548841,78.062914,../metrics/mfnn/2016-07-06_00.04.50_mfnn_ml-100k_e20_tt-0.7_task-13_movie-metrics.csv
0,0.894273,708.548841,78.062914,../metrics/mfnn/2016-07-06_15.38.20_mfnn_ml-100k_e20_tt-0.7_train-2_movie-metrics.csv
0,0.900379,708.548841,78.062914,../metrics/mfnn/2016-07-06_15.40.55_mfnn_ml-100k_e20_tt-0.7_train-3_movie-metrics.csv
0,0.897425,708.548841,78.062914,../metrics/mfnn/2016-07-06_15.40.31_mfnn_ml-100k_e20_tt-0.7_train-4_movie-metrics.csv
0,0.896677,708.548841,78.062914,../metrics/mfnn/2016-07-06_15.42.05_mfnn_ml-100k_e20_tt-0.7_train-5_movie-metrics.csv


In [26]:
df_cv_movie_metrics.mean(0)

auc                                0.897421
movie_id                         708.548841
nb_times_in_top_n_predictions     78.062914
dtype: float64

In [27]:
df_max_train = get_df_results(movie_metrics, max_train_ratings=10)
df_max_train.mean(0)

auc                                 0.861550
movie_id                         1155.646643
nb_times_in_top_n_predictions       1.090459
dtype: float64

## BEST ML-1m - 0.7 train / 0.3 test - sparse item

In [6]:
user_metrics = [
'../metrics/mfnn/2016-07-06_10.04.44_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv',
'../metrics/mfnn/2016-07-07_19.50.15_mfnn_ml-1m_e10_tt-0.7_train-2_user-metrics.csv',
'../metrics/mfnn/2016-07-07_19.49.55_mfnn_ml-1m_e10_tt-0.7_train-3_user-metrics.csv',
'../metrics/mfnn/2016-07-07_19.51.28_mfnn_ml-1m_e10_tt-0.7_train-4_user-metrics.csv',
'../metrics/mfnn/2016-07-07_19.51.20_mfnn_ml-1m_e10_tt-0.7_train-5_user-metrics.csv',
]

In [7]:
df_cv_user_metrics = get_df_results(user_metrics)
df_cv_user_metrics

Unnamed: 0,auc,avg_precision,f1,fcp,precision_at_20,recall_at_20,reciprocal_rank,spearman_rank_corr,path
0,0.929338,0.271757,0.244432,0.732583,0.303394,0.295808,0.65163,0.23745,../metrics/mfnn/2016-07-06_10.04.44_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv
0,0.928718,0.272953,0.245223,0.732098,0.304561,0.296719,0.650126,0.237843,../metrics/mfnn/2016-07-07_19.50.15_mfnn_ml-1m_e10_tt-0.7_train-2_user-metrics.csv
0,0.928753,0.272738,0.244487,0.733011,0.303295,0.295926,0.64516,0.23834,../metrics/mfnn/2016-07-07_19.49.55_mfnn_ml-1m_e10_tt-0.7_train-3_user-metrics.csv
0,0.929293,0.272179,0.244331,0.730356,0.303866,0.295081,0.648647,0.229045,../metrics/mfnn/2016-07-07_19.51.28_mfnn_ml-1m_e10_tt-0.7_train-4_user-metrics.csv
0,0.928353,0.269001,0.243164,0.730705,0.303096,0.293558,0.644943,0.232687,../metrics/mfnn/2016-07-07_19.51.20_mfnn_ml-1m_e10_tt-0.7_train-5_user-metrics.csv


In [8]:
df_cv_user_metrics.mean(0)

auc                   0.928891
avg_precision         0.271725
f1                    0.244328
fcp                   0.731751
precision_at_20       0.303642
recall_at_20          0.295419
reciprocal_rank       0.648101
spearman_rank_corr    0.235073
dtype: float64

In [12]:
# best config
with open('../models/mfnn/2016-07-06_10.04.44_mfnn_ml-1m_e10_tt-0.7_task-0_config.json') as f:
    pp.pprint(json.loads(f.read()))

{   u'ada_eps': 1e-06,
    u'adagrad': True,
    u'binarize': True,
    u'binarize_neg': 0,
    u'binarize_pos': 1,
    u'binarize_threshold': 1,
    u'd2v_model': u'doc2vec-models/2016-04-14_17.36.08_20e_pv-dbow_size50_lr0.025_window8_neg5',
    u'eval_in_parallel': True,
    u'experiment_name': u'mfnn_ml-1m_e10_tt-0.7_task-0',
    u'hit_threshold': 4,
    u'lr': 0.06,
    u'lr_decay': 0.02,
    u'metrics_save_dir': u'metrics/mfnn',
    u'model_save_dir': u'models/mfnn',
    u'nb_d2v_features': 50,
    u'nb_epochs': 10,
    u'nb_latent_f': 96,
    u'nb_movies': 3005,
    u'nb_users': 6040,
    u'pool_size': 24,
    u'precision_recall_at_n': 20,
    u'ratings_path': u'data/splits/ml-1m/ratings.csv',
    u'reg_lambda': 0.01,
    u'run_eval': True,
    u'run_movie_metrics': True,
    u'sparse_item': True,
    u'test': True,
    u'test_path': u'data/splits/ml-1m/sparse-item/0.7-test.csv',
    u'theano': True,
    u'top_n_predictions': 100,
    u'train_path': u'data/splits/ml-1m/sparse-ite

In [9]:
movie_metrics = [
'../metrics/mfnn/2016-07-06_10.04.44_mfnn_ml-1m_e10_tt-0.7_task-0_movie-metrics.csv',
'../metrics/mfnn/2016-07-07_19.50.15_mfnn_ml-1m_e10_tt-0.7_train-2_movie-metrics.csv',
'../metrics/mfnn/2016-07-07_19.49.55_mfnn_ml-1m_e10_tt-0.7_train-3_movie-metrics.csv',
'../metrics/mfnn/2016-07-07_19.51.28_mfnn_ml-1m_e10_tt-0.7_train-4_movie-metrics.csv',
'../metrics/mfnn/2016-07-07_19.51.20_mfnn_ml-1m_e10_tt-0.7_train-5_movie-metrics.csv',
]

In [10]:
df_cv_movie_metrics = get_df_results(movie_metrics)
df_cv_movie_metrics

Unnamed: 0,auc,movie_id,nb_times_in_top_n_predictions,path
0,0.917252,1999.3961,203.093477,../metrics/mfnn/2016-07-06_10.04.44_mfnn_ml-1m_e10_tt-0.7_task-0_movie-metrics.csv
0,0.917288,1999.3961,203.093477,../metrics/mfnn/2016-07-07_19.50.15_mfnn_ml-1m_e10_tt-0.7_train-2_movie-metrics.csv
0,0.917347,1999.3961,203.093477,../metrics/mfnn/2016-07-07_19.49.55_mfnn_ml-1m_e10_tt-0.7_train-3_movie-metrics.csv
0,0.91742,1999.3961,203.093477,../metrics/mfnn/2016-07-07_19.51.28_mfnn_ml-1m_e10_tt-0.7_train-4_movie-metrics.csv
0,0.917144,1999.3961,203.093477,../metrics/mfnn/2016-07-07_19.51.20_mfnn_ml-1m_e10_tt-0.7_train-5_movie-metrics.csv


In [11]:
df_cv_movie_metrics.mean(0)

auc                                 0.917290
movie_id                         1999.396100
nb_times_in_top_n_predictions     203.093477
dtype: float64

# Gridsearch

## ML-100k - 0.7 train / 0.3 test - sparse item

In [13]:
user_metrics = [
'../metrics/mfnn/2016-07-05_16.57.02_mfnn_ml-100k_e20_tt-0.7_task-3_user-metrics.csv',
'../metrics/mfnn/2016-07-05_17.02.41_mfnn_ml-100k_e20_tt-0.7_task-1_user-metrics.csv',
'../metrics/mfnn/2016-07-05_17.16.00_mfnn_ml-100k_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mfnn/2016-07-05_17.18.21_mfnn_ml-100k_e20_tt-0.7_task-2_user-metrics.csv',
'../metrics/mfnn/2016-07-05_18.20.48_mfnn_ml-100k_e20_tt-0.7_task-5_user-metrics.csv',
'../metrics/mfnn/2016-07-05_18.35.20_mfnn_ml-100k_e20_tt-0.7_task-6_user-metrics.csv',
'../metrics/mfnn/2016-07-05_19.28.49_mfnn_ml-100k_e20_tt-0.7_task-4_user-metrics.csv',
'../metrics/mfnn/2016-07-05_20.48.59_mfnn_ml-100k_e20_tt-0.7_task-10_user-metrics.csv',
'../metrics/mfnn/2016-07-05_20.50.44_mfnn_ml-100k_e20_tt-0.7_task-8_user-metrics.csv',
'../metrics/mfnn/2016-07-05_20.54.31_mfnn_ml-100k_e20_tt-0.7_task-7_user-metrics.csv',
'../metrics/mfnn/2016-07-05_21.08.21_mfnn_ml-100k_e20_tt-0.7_task-9_user-metrics.csv',
'../metrics/mfnn/2016-07-05_22.58.11_mfnn_ml-100k_e20_tt-0.7_task-14_user-metrics.csv',
'../metrics/mfnn/2016-07-05_23.52.36_mfnn_ml-100k_e20_tt-0.7_task-11_user-metrics.csv',
'../metrics/mfnn/2016-07-06_00.04.50_mfnn_ml-100k_e20_tt-0.7_task-13_user-metrics.csv',
'../metrics/mfnn/2016-07-06_00.29.46_mfnn_ml-100k_e20_tt-0.7_task-12_user-metrics.csv',
'../metrics/mfnn/2016-07-06_01.55.06_mfnn_ml-100k_e20_tt-0.7_task-15_user-metrics.csv',
]

In [14]:
df_results = get_df_results(user_metrics)
df_results.sort_values('auc', ascending=False)

Unnamed: 0,auc,avg_precision,f1,fcp,precision_at_20,recall_at_20,reciprocal_rank,spearman_rank_corr,path
0,0.93405,0.332305,0.284883,0.716626,0.281548,0.437622,0.659702,0.194061,../metrics/mfnn/2016-07-06_00.04.50_mfnn_ml-100k_e20_tt-0.7_task-13_user-metrics.csv
0,0.933666,0.342638,0.291039,0.716174,0.287858,0.446479,0.676071,0.194128,../metrics/mfnn/2016-07-06_00.29.46_mfnn_ml-100k_e20_tt-0.7_task-12_user-metrics.csv
0,0.933206,0.332708,0.286671,0.715359,0.283775,0.437332,0.664948,0.192243,../metrics/mfnn/2016-07-05_17.18.21_mfnn_ml-100k_e20_tt-0.7_task-2_user-metrics.csv
0,0.933087,0.339991,0.291412,0.715403,0.288176,0.445011,0.66616,0.190885,../metrics/mfnn/2016-07-05_20.54.31_mfnn_ml-100k_e20_tt-0.7_task-7_user-metrics.csv
0,0.933012,0.332868,0.287196,0.716969,0.283881,0.440897,0.655651,0.194351,../metrics/mfnn/2016-07-05_20.50.44_mfnn_ml-100k_e20_tt-0.7_task-8_user-metrics.csv
0,0.919859,0.341438,0.288997,0.714731,0.283669,0.444444,0.670635,0.189288,../metrics/mfnn/2016-07-05_18.35.20_mfnn_ml-100k_e20_tt-0.7_task-6_user-metrics.csv
0,0.919534,0.25224,0.232634,0.712204,0.234464,0.350292,0.572658,0.181582,../metrics/mfnn/2016-07-06_01.55.06_mfnn_ml-100k_e20_tt-0.7_task-15_user-metrics.csv
0,0.919505,0.252018,0.23069,0.712758,0.23245,0.348769,0.580826,0.183242,../metrics/mfnn/2016-07-05_21.08.21_mfnn_ml-100k_e20_tt-0.7_task-9_user-metrics.csv
0,0.919364,0.251027,0.231538,0.712373,0.233351,0.348951,0.57162,0.181634,../metrics/mfnn/2016-07-05_22.58.11_mfnn_ml-100k_e20_tt-0.7_task-14_user-metrics.csv
0,0.919208,0.251542,0.230912,0.71335,0.232556,0.348146,0.575251,0.18459,../metrics/mfnn/2016-07-05_17.16.00_mfnn_ml-100k_e20_tt-0.7_task-0_user-metrics.csv


In [15]:
# best config
with open('../models/mfnn/2016-07-06_00.04.50_mfnn_ml-100k_e20_tt-0.7_task-13_config.json') as f:
    pp.pprint(json.loads(f.read()))

{   u'ada_eps': 1e-06,
    u'adagrad': True,
    u'binarize': True,
    u'binarize_neg': 0,
    u'binarize_pos': 1,
    u'binarize_threshold': 1,
    u'd2v_model': u'doc2vec-models/2016-04-14_17.36.08_20e_pv-dbow_size50_lr0.025_window8_neg5',
    u'eval_in_parallel': False,
    u'experiment_name': u'mfnn_ml-100k_e20_tt-0.7_task-13',
    u'hit_threshold': 4,
    u'lr': 0.03,
    u'lr_decay': 0.02,
    u'metrics_save_dir': u'metrics/mfnn',
    u'model_save_dir': u'models/mfnn',
    u'nb_d2v_features': 50,
    u'nb_epochs': 20,
    u'nb_latent_f': 96,
    u'nb_movies': 1261,
    u'nb_users': 943,
    u'precision_recall_at_n': 20,
    u'ratings_path': u'data/splits/ml-100k/ratings.csv',
    u'reg_lambda': 0.01,
    u'run_eval': True,
    u'run_movie_metrics': True,
    u'sparse_item': True,
    u'test': True,
    u'test_path': u'data/splits/ml-100k/sparse-item/0.7-test.csv',
    u'theano': True,
    u'top_n_predictions': 100,
    u'train_path': u'data/splits/ml-100k/sparse-item/0.7-train.c

## ML-1M - 0.7 train / 0.3 test - sparse item

In [16]:
user_metrics = [
'../metrics/mfnn/2016-07-06_08.29.14_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv',
'../metrics/mfnn/2016-07-06_08.48.33_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv',
'../metrics/mfnn/2016-07-06_08.48.51_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv',
'../metrics/mfnn/2016-07-06_09.12.06_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv',
'../metrics/mfnn/2016-07-06_09.45.50_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv',
'../metrics/mfnn/2016-07-06_10.04.44_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv',
'../metrics/mfnn/2016-07-06_10.09.37_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv',
'../metrics/mfnn/2016-07-06_10.14.08_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv',
'../metrics/mfnn/2016-07-06_11.54.32_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv',
]

In [17]:
df_results = get_df_results(user_metrics)
df_results.sort_values('auc', ascending=False)

Unnamed: 0,auc,avg_precision,f1,fcp,precision_at_20,recall_at_20,reciprocal_rank,spearman_rank_corr,path
0,0.929338,0.271757,0.244432,0.732583,0.303394,0.295808,0.65163,0.23745,../metrics/mfnn/2016-07-06_10.04.44_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv
0,0.929032,0.275323,0.247104,0.732554,0.305464,0.300504,0.650843,0.237371,../metrics/mfnn/2016-07-06_10.14.08_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv
0,0.928277,0.265896,0.238762,0.73249,0.298146,0.286945,0.644833,0.236775,../metrics/mfnn/2016-07-06_11.54.32_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv
0,0.927319,0.262127,0.237676,0.73222,0.296656,0.285755,0.636657,0.236064,../metrics/mfnn/2016-07-06_09.45.50_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv
0,0.926007,0.255197,0.23161,0.73211,0.29043,0.276659,0.628022,0.235795,../metrics/mfnn/2016-07-06_08.48.51_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv
0,0.924262,0.254463,0.232176,0.73237,0.290919,0.278418,0.628875,0.236306,../metrics/mfnn/2016-07-06_09.12.06_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv
0,0.923523,0.246574,0.224618,0.732761,0.28375,0.26638,0.617143,0.237526,../metrics/mfnn/2016-07-06_08.48.33_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv
0,0.922061,0.273624,0.245193,0.727336,0.300323,0.301344,0.65959,0.221656,../metrics/mfnn/2016-07-06_08.29.14_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv
0,0.896484,0.181763,0.16899,0.735098,0.229089,0.187601,0.531755,0.243689,../metrics/mfnn/2016-07-06_10.09.37_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv


In [18]:
# best config
with open('../models/mfnn/2016-07-06_10.04.44_mfnn_ml-1m_e10_tt-0.7_task-0_config.json') as f:
    pp.pprint(json.loads(f.read()))

{   u'ada_eps': 1e-06,
    u'adagrad': True,
    u'binarize': True,
    u'binarize_neg': 0,
    u'binarize_pos': 1,
    u'binarize_threshold': 1,
    u'd2v_model': u'doc2vec-models/2016-04-14_17.36.08_20e_pv-dbow_size50_lr0.025_window8_neg5',
    u'eval_in_parallel': True,
    u'experiment_name': u'mfnn_ml-1m_e10_tt-0.7_task-0',
    u'hit_threshold': 4,
    u'lr': 0.06,
    u'lr_decay': 0.02,
    u'metrics_save_dir': u'metrics/mfnn',
    u'model_save_dir': u'models/mfnn',
    u'nb_d2v_features': 50,
    u'nb_epochs': 10,
    u'nb_latent_f': 96,
    u'nb_movies': 3005,
    u'nb_users': 6040,
    u'pool_size': 24,
    u'precision_recall_at_n': 20,
    u'ratings_path': u'data/splits/ml-1m/ratings.csv',
    u'reg_lambda': 0.01,
    u'run_eval': True,
    u'run_movie_metrics': True,
    u'sparse_item': True,
    u'test': True,
    u'test_path': u'data/splits/ml-1m/sparse-item/0.7-test.csv',
    u'theano': True,
    u'top_n_predictions': 100,
    u'train_path': u'data/splits/ml-1m/sparse-ite

## Old versions of MFNN (error modeling)

#### ML-100k - 0.7 train / 0.3 test - sparse item

In [13]:
user_metrics = [
'../metrics/mfnn/2016-06-29_17.42.15_mfnn_ml-100k_e20_tt-0.7_task-1_user-metrics.csv',
'../metrics/mfnn/2016-06-29_17.43.53_mfnn_ml-100k_e20_tt-0.7_task-3_user-metrics.csv',
'../metrics/mfnn/2016-06-29_18.49.44_mfnn_ml-100k_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mfnn/2016-06-29_19.10.09_mfnn_ml-100k_e20_tt-0.7_task-2_user-metrics.csv',
'../metrics/mfnn/2016-06-29_19.46.38_mfnn_ml-100k_e20_tt-0.7_task-5_user-metrics.csv',
'../metrics/mfnn/2016-06-29_20.46.44_mfnn_ml-100k_e20_tt-0.7_task-4_user-metrics.csv',
'../metrics/mfnn/2016-06-29_21.06.33_mfnn_ml-100k_e20_tt-0.7_task-7_user-metrics.csv',
'../metrics/mfnn/2016-06-29_21.36.34_mfnn_ml-100k_e20_tt-0.7_task-8_user-metrics.csv',
'../metrics/mfnn/2016-06-29_22.27.40_mfnn_ml-100k_e20_tt-0.7_task-6_user-metrics.csv',
'../metrics/mfnn/2016-06-29_23.10.39_mfnn_ml-100k_e20_tt-0.7_task-10_user-metrics.csv',
'../metrics/mfnn/2016-06-29_23.57.20_mfnn_ml-100k_e20_tt-0.7_task-9_user-metrics.csv',
'../metrics/mfnn/2016-06-30_00.54.10_mfnn_ml-100k_e20_tt-0.7_task-12_user-metrics.csv',
'../metrics/mfnn/2016-06-30_01.10.42_mfnn_ml-100k_e20_tt-0.7_task-11_user-metrics.csv',
'../metrics/mfnn/2016-06-30_02.34.35_mfnn_ml-100k_e20_tt-0.7_task-14_user-metrics.csv',
'../metrics/mfnn/2016-06-30_02.46.06_mfnn_ml-100k_e20_tt-0.7_task-13_user-metrics.csv',
'../metrics/mfnn/2016-06-30_03.11.58_mfnn_ml-100k_e20_tt-0.7_task-15_user-metrics.csv',
]

In [14]:
df_results = get_df_results(user_metrics)
df_results.sort_values('auc', ascending=False)

Unnamed: 0,auc,avg_precision,f1,fcp,precision_at_20,recall_at_20,reciprocal_rank,spearman_rank_corr,path
0,0.935373,0.335996,0.288141,0.71576,0.285684,0.441399,0.661673,0.191578,../metrics/mfnn/2016-06-29_19.46.38_mfnn_ml-100k_e20_tt-0.7_task-5_user-metrics.csv
0,0.935215,0.329899,0.285087,0.716271,0.282238,0.43703,0.66549,0.194307,../metrics/mfnn/2016-06-30_03.11.58_mfnn_ml-100k_e20_tt-0.7_task-15_user-metrics.csv
0,0.935047,0.314763,0.275493,0.716495,0.273224,0.422368,0.640682,0.193775,../metrics/mfnn/2016-06-30_02.34.35_mfnn_ml-100k_e20_tt-0.7_task-14_user-metrics.csv
0,0.935007,0.313464,0.272358,0.71595,0.271103,0.415016,0.64258,0.192799,../metrics/mfnn/2016-06-29_20.46.44_mfnn_ml-100k_e20_tt-0.7_task-4_user-metrics.csv
0,0.934681,0.329256,0.282835,0.717006,0.280011,0.432944,0.659066,0.196333,../metrics/mfnn/2016-06-29_21.06.33_mfnn_ml-100k_e20_tt-0.7_task-7_user-metrics.csv
0,0.934641,0.341208,0.290752,0.715236,0.287434,0.445538,0.672096,0.191137,../metrics/mfnn/2016-06-29_23.10.39_mfnn_ml-100k_e20_tt-0.7_task-10_user-metrics.csv
0,0.934382,0.32992,0.285179,0.716457,0.283192,0.43387,0.656426,0.194125,../metrics/mfnn/2016-06-29_23.57.20_mfnn_ml-100k_e20_tt-0.7_task-9_user-metrics.csv
0,0.934166,0.328526,0.282746,0.715376,0.280223,0.430582,0.658262,0.191583,../metrics/mfnn/2016-06-30_00.54.10_mfnn_ml-100k_e20_tt-0.7_task-12_user-metrics.csv
0,0.934165,0.3337,0.284385,0.717744,0.282078,0.433518,0.665096,0.1978,../metrics/mfnn/2016-06-29_19.10.09_mfnn_ml-100k_e20_tt-0.7_task-2_user-metrics.csv
0,0.934074,0.342575,0.290737,0.716585,0.287646,0.445631,0.664009,0.195383,../metrics/mfnn/2016-06-29_18.49.44_mfnn_ml-100k_e20_tt-0.7_task-0_user-metrics.csv


In [15]:
# best config
with open('../models/mfnn/2016-06-29_19.46.38_mfnn_ml-100k_e20_tt-0.7_task-5_config.json') as f:
    pp.pprint(json.loads(f.read()))

{   u'ada_eps': 1e-06,
    u'adagrad': True,
    u'binarize': True,
    u'binarize_neg': 0,
    u'binarize_pos': 1,
    u'binarize_threshold': 1,
    u'd2v_model': u'doc2vec-models/2016-04-14_17.36.08_20e_pv-dbow_size50_lr0.025_window8_neg5',
    u'eval_in_parallel': False,
    u'experiment_name': u'mfnn_ml-100k_e20_tt-0.7_task-5',
    u'hit_threshold': 4,
    u'lr': 0.03,
    u'lr_decay': 0.02,
    u'metrics_save_dir': u'metrics/mfnn',
    u'model_save_dir': u'models/mfnn',
    u'nb_d2v_features': 50,
    u'nb_epochs': 20,
    u'nb_latent_f': 128,
    u'nb_movies': 1261,
    u'nb_users': 943,
    u'precision_recall_at_n': 20,
    u'ratings_path': u'data/splits/ml-100k/ratings.csv',
    u'reg_lambda': 0.01,
    u'run_eval': True,
    u'run_movie_metrics': True,
    u'sparse_item': True,
    u'test': True,
    u'test_path': u'data/splits/ml-100k/sparse-item/0.7-test.csv',
    u'top_n_predictions': 100,
    u'train_path': u'data/splits/ml-100k/sparse-item/0.7-train.csv',
    u'train_test

#### ML-1M - 0.7 train / 0.3 test - sparse item

In [53]:
user_metrics = [
'../metrics/mfnn/2016-06-24_19.32.18_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv',
'../metrics/mfnn/2016-06-24_19.32.34_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv',
'../metrics/mfnn/2016-06-24_20.20.25_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv',
'../metrics/mfnn/2016-06-24_22.34.07_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv',
'../metrics/mfnn/2016-06-25_22.58.04_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv',
'../metrics/mfnn/2016-06-26_00.27.34_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv',
'../metrics/mfnn/2016-06-26_08.08.43_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv',
'../metrics/mfnn/2016-06-26_22.30.57_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv',
'../metrics/mfnn/2016-06-26_22.39.12_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv',
'../metrics/mfnn/2016-06-27_00.50.04_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv',
]

In [54]:
df_results = get_df_results(user_metrics)
df_results.sort_values('auc', ascending=False)

Unnamed: 0,auc,avg_precision,f1,fcp,precision_at_20,recall_at_20,reciprocal_rank,spearman_rank_corr,path
0,0.930207,0.275614,0.247795,0.73166,0.305745,0.301779,0.652707,0.234691,../metrics/mfnn/2016-06-25_22.58.04_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv
0,0.928134,0.26224,0.235928,0.732212,0.293949,0.283973,0.63351,0.236068,../metrics/mfnn/2016-06-24_22.34.07_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv
0,0.928056,0.261269,0.236727,0.731971,0.294901,0.285401,0.627566,0.235839,../metrics/mfnn/2016-06-26_00.27.34_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv
0,0.927645,0.26147,0.236771,0.731512,0.29399,0.286007,0.628579,0.233787,../metrics/mfnn/2016-06-24_19.32.18_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv
0,0.924041,0.24533,0.223964,0.731849,0.28178,0.266547,0.610945,0.235134,../metrics/mfnn/2016-06-24_20.20.25_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv
0,0.923749,0.244848,0.223052,0.732099,0.281175,0.264956,0.614683,0.235651,../metrics/mfnn/2016-06-26_22.30.57_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv
0,0.923499,0.243704,0.22209,0.732006,0.280199,0.263121,0.609093,0.235507,../metrics/mfnn/2016-06-26_22.39.12_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv
0,0.912936,0.215241,0.199477,0.733152,0.25731,0.2309,0.576534,0.238848,../metrics/mfnn/2016-06-26_08.08.43_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv
0,0.899805,0.188665,0.177465,0.733476,0.232657,0.202566,0.546767,0.23899,../metrics/mfnn/2016-06-24_19.32.34_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv
0,0.899771,0.187774,0.176116,0.732703,0.230853,0.201047,0.546865,0.237,../metrics/mfnn/2016-06-27_00.50.04_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv


In [55]:
# best config
with open('../models/mfnn/2016-06-25_22.58.04_mfnn_ml-1m_e10_tt-0.7_task-0_config.json') as f:
    pp.pprint(json.loads(f.read()))

{   u'ada_eps': 1e-06,
    u'adagrad': True,
    u'binarize': True,
    u'binarize_neg': 0,
    u'binarize_pos': 1,
    u'binarize_threshold': 1,
    u'd2v_model': u'doc2vec-models/2016-04-14_17.36.08_20e_pv-dbow_size50_lr0.025_window8_neg5',
    u'eval_in_parallel': True,
    u'experiment_name': u'mfnn_ml-1m_e10_tt-0.7_task-0',
    u'hit_threshold': 4,
    u'lr': 0.06,
    u'lr_decay': 0.02,
    u'metrics_save_dir': u'metrics/mfnn',
    u'model_save_dir': u'models/mfnn',
    u'nb_d2v_features': 50,
    u'nb_epochs': 10,
    u'nb_latent_f': 128,
    u'nb_movies': 3005,
    u'nb_users': 6040,
    u'pool_size': 8,
    u'precision_recall_at_n': 20,
    u'ratings_path': u'data/splits/ml-1m/ratings.csv',
    u'reg_lambda': 0.01,
    u'run_eval': True,
    u'run_movie_metrics': True,
    u'sparse_item': True,
    u'test': True,
    u'test_path': u'data/splits/ml-1m/sparse-item/0.7-test.csv',
    u'top_n_predictions': 100,
    u'train_path': u'data/splits/ml-1m/sparse-item/0.7-train.csv',
   

In [56]:
movie_metrics = [
'../metrics/mfnn/2016-06-24_19.32.18_mfnn_ml-1m_e10_tt-0.7_task-0_movie-metrics.csv',
'../metrics/mfnn/2016-06-24_19.32.34_mfnn_ml-1m_e10_tt-0.7_task-0_movie-metrics.csv',
'../metrics/mfnn/2016-06-24_20.20.25_mfnn_ml-1m_e10_tt-0.7_task-0_movie-metrics.csv',
'../metrics/mfnn/2016-06-24_22.34.07_mfnn_ml-1m_e10_tt-0.7_task-0_movie-metrics.csv',
'../metrics/mfnn/2016-06-25_22.58.04_mfnn_ml-1m_e10_tt-0.7_task-0_movie-metrics.csv',
'../metrics/mfnn/2016-06-26_00.27.34_mfnn_ml-1m_e10_tt-0.7_task-0_movie-metrics.csv',
'../metrics/mfnn/2016-06-26_08.08.43_mfnn_ml-1m_e10_tt-0.7_task-0_movie-metrics.csv',
'../metrics/mfnn/2016-06-26_22.30.57_mfnn_ml-1m_e10_tt-0.7_task-0_movie-metrics.csv',
'../metrics/mfnn/2016-06-26_22.39.12_mfnn_ml-1m_e10_tt-0.7_task-0_movie-metrics.csv',
'../metrics/mfnn/2016-06-27_00.50.04_mfnn_ml-1m_e10_tt-0.7_task-0_movie-metrics.csv',
]

In [57]:
df_cv_movie_metrics = get_df_results(movie_metrics)
df_cv_movie_metrics

Unnamed: 0,auc,movie_id,nb_times_in_top_n_predictions,path
0,0.918073,1999.3961,203.093477,../metrics/mfnn/2016-06-24_19.32.18_mfnn_ml-1m_e10_tt-0.7_task-0_movie-metrics.csv
0,0.890926,1999.3961,203.093477,../metrics/mfnn/2016-06-24_19.32.34_mfnn_ml-1m_e10_tt-0.7_task-0_movie-metrics.csv
0,0.915559,1999.3961,203.093477,../metrics/mfnn/2016-06-24_20.20.25_mfnn_ml-1m_e10_tt-0.7_task-0_movie-metrics.csv
0,0.91795,1999.3961,203.093477,../metrics/mfnn/2016-06-24_22.34.07_mfnn_ml-1m_e10_tt-0.7_task-0_movie-metrics.csv
0,0.920625,1999.3961,203.093477,../metrics/mfnn/2016-06-25_22.58.04_mfnn_ml-1m_e10_tt-0.7_task-0_movie-metrics.csv
0,0.918416,1999.3961,203.093477,../metrics/mfnn/2016-06-26_00.27.34_mfnn_ml-1m_e10_tt-0.7_task-0_movie-metrics.csv
0,0.905064,1999.3961,203.093477,../metrics/mfnn/2016-06-26_08.08.43_mfnn_ml-1m_e10_tt-0.7_task-0_movie-metrics.csv
0,0.914662,1999.3961,203.093477,../metrics/mfnn/2016-06-26_22.30.57_mfnn_ml-1m_e10_tt-0.7_task-0_movie-metrics.csv
0,0.914609,1999.3961,203.093477,../metrics/mfnn/2016-06-26_22.39.12_mfnn_ml-1m_e10_tt-0.7_task-0_movie-metrics.csv
0,0.891066,1999.3961,203.093477,../metrics/mfnn/2016-06-27_00.50.04_mfnn_ml-1m_e10_tt-0.7_task-0_movie-metrics.csv
