In [7]:
import numpy as np
import pandas as pd
import json
import pprint
import json
pp = pprint.PrettyPrinter(indent=4)
pd.options.display.max_colwidth = 100

In [8]:
def df_mean(path, apply_to_df):
    df_mean = pd.read_csv(path)
    df_mean = apply_to_df(df_mean)
        
    if 'nb_movies_not_in_train' in df_mean:
        df_mean = df_mean.drop(['nb_movies_not_in_train'],axis=1)
    if 'nb_test_ratings' in df_mean:
        df_mean = df_mean.drop(['nb_test_ratings'],axis=1)
    if 'nb_train_ratings' in df_mean:
        df_mean = df_mean.drop(['nb_train_ratings'],axis=1)
    if 'user_id' in df_mean:
        df_mean = df_mean.drop(['user_id'],axis=1)
    
    df_mean = df_mean.mean(0).to_frame().T
    df_mean['path'] = path
    
    return df_mean
    

def get_df_results(results, max_train_ratings=None):
    if not max_train_ratings:
        apply_to_df = lambda df: df[df['nb_test_ratings'] > 0]
    else:
        apply_to_df = lambda df: df[(df['nb_test_ratings'] > 0) & (df['nb_train_ratings'] <= max_train_ratings)]
        
    result_map = map(lambda path: df_mean(path, apply_to_df), results)
    return reduce(lambda x, y: x.append(y), result_map)

## BEST ML-100k - 0.7 train / 0.3 test - sparse item

In [17]:
user_metrics = [
'../metrics/mfnn/2016-06-29_19.46.38_mfnn_ml-100k_e20_tt-0.7_task-5_user-metrics.csv',
'../metrics/mfnn/2016-06-30_14.38.51_mfnn_ml-100k_e20_tt-0.7_train-2_user-metrics.csv',
'../metrics/mfnn/2016-06-30_14.35.08_mfnn_ml-100k_e20_tt-0.7_train-3_user-metrics.csv',
'../metrics/mfnn/2016-06-30_14.34.04_mfnn_ml-100k_e20_tt-0.7_train-4_user-metrics.csv',
'../metrics/mfnn/2016-06-30_14.35.45_mfnn_ml-100k_e20_tt-0.7_train-5_user-metrics.csv',
]

In [18]:
df_cv_user_metrics = get_df_results(user_metrics)
df_cv_user_metrics

Unnamed: 0,auc,avg_precision,f1,fcp,precision_at_20,recall_at_20,reciprocal_rank,spearman_rank_corr,path
0,0.935373,0.335996,0.288141,0.71576,0.285684,0.441399,0.661673,0.191578,../metrics/mfnn/2016-06-29_19.46.38_mfnn_ml-100k_e20_tt-0.7_task-5_user-metrics.csv
0,0.935007,0.344234,0.28813,0.718641,0.286426,0.443253,0.69114,0.205383,../metrics/mfnn/2016-06-30_14.38.51_mfnn_ml-100k_e20_tt-0.7_train-2_user-metrics.csv
0,0.935899,0.346139,0.297077,0.714599,0.293531,0.452712,0.688312,0.197811,../metrics/mfnn/2016-06-30_14.35.08_mfnn_ml-100k_e20_tt-0.7_train-3_user-metrics.csv
0,0.93434,0.345099,0.290593,0.710893,0.287275,0.44049,0.696522,0.191103,../metrics/mfnn/2016-06-30_14.34.04_mfnn_ml-100k_e20_tt-0.7_train-4_user-metrics.csv
0,0.93675,0.340563,0.293501,0.708338,0.291145,0.444469,0.687291,0.17741,../metrics/mfnn/2016-06-30_14.35.45_mfnn_ml-100k_e20_tt-0.7_train-5_user-metrics.csv


In [19]:
df_cv_user_metrics.mean(0)

auc                   0.935474
avg_precision         0.342406
f1                    0.291488
fcp                   0.713646
precision_at_20       0.288812
recall_at_20          0.444465
reciprocal_rank       0.684988
spearman_rank_corr    0.192657
dtype: float64

In [22]:
df_max_train = get_df_results(user_metrics, max_train_ratings=20)
df_max_train.mean(0)

auc                   0.949840
avg_precision         0.271185
f1                    0.181447
fcp                   0.690871
precision_at_20       0.114469
recall_at_20          0.545316
reciprocal_rank       0.495516
spearman_rank_corr    0.115965
dtype: float64

In [21]:
# best config
with open('../models/mfnn/2016-06-29_19.46.38_mfnn_ml-100k_e20_tt-0.7_task-5_config.json') as f:
    pp.pprint(json.loads(f.read()))

{   u'ada_eps': 1e-06,
    u'adagrad': True,
    u'binarize': True,
    u'binarize_neg': 0,
    u'binarize_pos': 1,
    u'binarize_threshold': 1,
    u'd2v_model': u'doc2vec-models/2016-04-14_17.36.08_20e_pv-dbow_size50_lr0.025_window8_neg5',
    u'eval_in_parallel': False,
    u'experiment_name': u'mfnn_ml-100k_e20_tt-0.7_task-5',
    u'hit_threshold': 4,
    u'lr': 0.03,
    u'lr_decay': 0.02,
    u'metrics_save_dir': u'metrics/mfnn',
    u'model_save_dir': u'models/mfnn',
    u'nb_d2v_features': 50,
    u'nb_epochs': 20,
    u'nb_latent_f': 128,
    u'nb_movies': 1261,
    u'nb_users': 943,
    u'precision_recall_at_n': 20,
    u'ratings_path': u'data/splits/ml-100k/ratings.csv',
    u'reg_lambda': 0.01,
    u'run_eval': True,
    u'run_movie_metrics': True,
    u'sparse_item': True,
    u'test': True,
    u'test_path': u'data/splits/ml-100k/sparse-item/0.7-test.csv',
    u'top_n_predictions': 100,
    u'train_path': u'data/splits/ml-100k/sparse-item/0.7-train.csv',
    u'train_test

In [11]:
movie_metrics = [
'../metrics/mfnn/2016-06-21_19.30.24_mfnn_ml-100k_e20_tt-0.7_task-1_movie-metrics.csv',
'../metrics/mfnn/2016-06-26_05.41.47_mfnn_ml-100k_e20_tt-0.7_train-2_movie-metrics.csv',
'../metrics/mfnn/2016-06-26_05.50.41_mfnn_ml-100k_e20_tt-0.7_train-3_movie-metrics.csv',
'../metrics/mfnn/2016-06-26_05.41.05_mfnn_ml-100k_e20_tt-0.7_train-4_movie-metrics.csv',
'../metrics/mfnn/2016-06-26_05.36.23_mfnn_ml-100k_e20_tt-0.7_train-5_movie-metrics.csv',
]


In [48]:
df_cv_movie_metrics = get_df_results(movie_metrics)
df_cv_movie_metrics

Unnamed: 0,auc,movie_id,nb_times_in_top_n_predictions,path
0,0.897062,708.548841,78.062914,../metrics/mfnn/2016-06-21_19.30.24_mfnn_ml-100k_e20_tt-0.7_task-1_movie-metrics.csv
0,0.892402,708.548841,78.062914,../metrics/mfnn/2016-06-26_05.41.47_mfnn_ml-100k_e20_tt-0.7_train-2_movie-metrics.csv
0,0.895192,708.548841,78.062914,../metrics/mfnn/2016-06-26_05.50.41_mfnn_ml-100k_e20_tt-0.7_train-3_movie-metrics.csv
0,0.893445,708.548841,78.062914,../metrics/mfnn/2016-06-26_05.41.05_mfnn_ml-100k_e20_tt-0.7_train-4_movie-metrics.csv
0,0.897041,708.548841,78.062914,../metrics/mfnn/2016-06-26_05.36.23_mfnn_ml-100k_e20_tt-0.7_train-5_movie-metrics.csv


In [49]:
df_cv_movie_metrics.mean(0)

auc                                0.895029
movie_id                         708.548841
nb_times_in_top_n_predictions     78.062914
dtype: float64

In [12]:
df_max_train = get_df_results(movie_metrics, max_train_ratings=10)
df_max_train.mean(0)

auc                                 0.851882
movie_id                         1155.646643
nb_times_in_top_n_predictions       1.269965
dtype: float64

## BEST ML-1m - 0.7 train / 0.3 test - sparse item

In [5]:
user_metrics = [
'../metrics/mfnn/2016-06-25_22.58.04_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv',
'../metrics/mfnn/2016-06-27_15.53.43_mfnn_ml-1m_e10_tt-0.7_train-3_task-0_user-metrics.csv',
'../metrics/mfnn/2016-06-27_15.03.52_mfnn_ml-1m_e10_tt-0.7_train-4_task-0_user-metrics.csv',
'../metrics/mfnn/2016-06-27_15.03.45_mfnn_ml-1m_e10_tt-0.7_train-5_task-0_user-metrics.csv',

]

In [6]:
df_cv_user_metrics = get_df_results(user_metrics)
df_cv_user_metrics

Unnamed: 0,auc,avg_precision,f1,fcp,precision_at_20,recall_at_20,reciprocal_rank,spearman_rank_corr,path
0,0.930207,0.275614,0.247795,0.73166,0.305745,0.301779,0.652707,0.234691,../metrics/mfnn/2016-06-25_22.58.04_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv
0,0.929762,0.277756,0.247114,0.732566,0.304272,0.300872,0.652439,0.23698,../metrics/mfnn/2016-06-27_15.53.43_mfnn_ml-1m_e10_tt-0.7_train-3_task-0_user-metrics.csv
0,0.930535,0.275805,0.246374,0.730024,0.304437,0.299467,0.654577,0.228405,../metrics/mfnn/2016-06-27_15.03.52_mfnn_ml-1m_e10_tt-0.7_train-4_task-0_user-metrics.csv
0,0.929838,0.273262,0.245625,0.730126,0.303709,0.298119,0.646272,0.230853,../metrics/mfnn/2016-06-27_15.03.45_mfnn_ml-1m_e10_tt-0.7_train-5_task-0_user-metrics.csv


In [None]:
df_cv_user_metrics.mean(0)

In [16]:
# best config
with open('../models/mfnn/2016-06-25_22.58.04_mfnn_ml-1m_e10_tt-0.7_task-0_config.json') as f:
    pp.pprint(json.loads(f.read()))

{   u'ada_eps': 1e-06,
    u'adagrad': True,
    u'binarize': True,
    u'binarize_neg': 0,
    u'binarize_pos': 1,
    u'binarize_threshold': 1,
    u'd2v_model': u'doc2vec-models/2016-04-14_17.36.08_20e_pv-dbow_size50_lr0.025_window8_neg5',
    u'eval_in_parallel': True,
    u'experiment_name': u'mfnn_ml-1m_e10_tt-0.7_task-0',
    u'hit_threshold': 4,
    u'lr': 0.06,
    u'lr_decay': 0.02,
    u'metrics_save_dir': u'metrics/mfnn',
    u'model_save_dir': u'models/mfnn',
    u'nb_d2v_features': 50,
    u'nb_epochs': 10,
    u'nb_latent_f': 128,
    u'nb_movies': 3005,
    u'nb_users': 6040,
    u'pool_size': 8,
    u'precision_recall_at_n': 20,
    u'ratings_path': u'data/splits/ml-1m/ratings.csv',
    u'reg_lambda': 0.01,
    u'run_eval': True,
    u'run_movie_metrics': True,
    u'sparse_item': True,
    u'test': True,
    u'test_path': u'data/splits/ml-1m/sparse-item/0.7-test.csv',
    u'top_n_predictions': 100,
    u'train_path': u'data/splits/ml-1m/sparse-item/0.7-train.csv',
   

In [None]:
movie_metrics = [
'../metrics/mfnn/2016-06-25_22.58.04_mfnn_ml-1m_e10_tt-0.7_task-0_movie-metrics.csv'
]


In [None]:
df_cv_movie_metrics = get_df_results(movie_metrics)
df_cv_movie_metrics

In [None]:
df_cv_movie_metrics.mean(0)

# Gridsearch

In [13]:
user_metrics = [
'../metrics/mfnn/2016-06-29_17.42.15_mfnn_ml-100k_e20_tt-0.7_task-1_user-metrics.csv',
'../metrics/mfnn/2016-06-29_17.43.53_mfnn_ml-100k_e20_tt-0.7_task-3_user-metrics.csv',
'../metrics/mfnn/2016-06-29_18.49.44_mfnn_ml-100k_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mfnn/2016-06-29_19.10.09_mfnn_ml-100k_e20_tt-0.7_task-2_user-metrics.csv',
'../metrics/mfnn/2016-06-29_19.46.38_mfnn_ml-100k_e20_tt-0.7_task-5_user-metrics.csv',
'../metrics/mfnn/2016-06-29_20.46.44_mfnn_ml-100k_e20_tt-0.7_task-4_user-metrics.csv',
'../metrics/mfnn/2016-06-29_21.06.33_mfnn_ml-100k_e20_tt-0.7_task-7_user-metrics.csv',
'../metrics/mfnn/2016-06-29_21.36.34_mfnn_ml-100k_e20_tt-0.7_task-8_user-metrics.csv',
'../metrics/mfnn/2016-06-29_22.27.40_mfnn_ml-100k_e20_tt-0.7_task-6_user-metrics.csv',
'../metrics/mfnn/2016-06-29_23.10.39_mfnn_ml-100k_e20_tt-0.7_task-10_user-metrics.csv',
'../metrics/mfnn/2016-06-29_23.57.20_mfnn_ml-100k_e20_tt-0.7_task-9_user-metrics.csv',
'../metrics/mfnn/2016-06-30_00.54.10_mfnn_ml-100k_e20_tt-0.7_task-12_user-metrics.csv',
'../metrics/mfnn/2016-06-30_01.10.42_mfnn_ml-100k_e20_tt-0.7_task-11_user-metrics.csv',
'../metrics/mfnn/2016-06-30_02.34.35_mfnn_ml-100k_e20_tt-0.7_task-14_user-metrics.csv',
'../metrics/mfnn/2016-06-30_02.46.06_mfnn_ml-100k_e20_tt-0.7_task-13_user-metrics.csv',
'../metrics/mfnn/2016-06-30_03.11.58_mfnn_ml-100k_e20_tt-0.7_task-15_user-metrics.csv',
]

In [14]:
df_results = get_df_results(user_metrics)
df_results.sort_values('auc', ascending=False)

Unnamed: 0,auc,avg_precision,f1,fcp,precision_at_20,recall_at_20,reciprocal_rank,spearman_rank_corr,path
0,0.935373,0.335996,0.288141,0.71576,0.285684,0.441399,0.661673,0.191578,../metrics/mfnn/2016-06-29_19.46.38_mfnn_ml-100k_e20_tt-0.7_task-5_user-metrics.csv
0,0.935215,0.329899,0.285087,0.716271,0.282238,0.43703,0.66549,0.194307,../metrics/mfnn/2016-06-30_03.11.58_mfnn_ml-100k_e20_tt-0.7_task-15_user-metrics.csv
0,0.935047,0.314763,0.275493,0.716495,0.273224,0.422368,0.640682,0.193775,../metrics/mfnn/2016-06-30_02.34.35_mfnn_ml-100k_e20_tt-0.7_task-14_user-metrics.csv
0,0.935007,0.313464,0.272358,0.71595,0.271103,0.415016,0.64258,0.192799,../metrics/mfnn/2016-06-29_20.46.44_mfnn_ml-100k_e20_tt-0.7_task-4_user-metrics.csv
0,0.934681,0.329256,0.282835,0.717006,0.280011,0.432944,0.659066,0.196333,../metrics/mfnn/2016-06-29_21.06.33_mfnn_ml-100k_e20_tt-0.7_task-7_user-metrics.csv
0,0.934641,0.341208,0.290752,0.715236,0.287434,0.445538,0.672096,0.191137,../metrics/mfnn/2016-06-29_23.10.39_mfnn_ml-100k_e20_tt-0.7_task-10_user-metrics.csv
0,0.934382,0.32992,0.285179,0.716457,0.283192,0.43387,0.656426,0.194125,../metrics/mfnn/2016-06-29_23.57.20_mfnn_ml-100k_e20_tt-0.7_task-9_user-metrics.csv
0,0.934166,0.328526,0.282746,0.715376,0.280223,0.430582,0.658262,0.191583,../metrics/mfnn/2016-06-30_00.54.10_mfnn_ml-100k_e20_tt-0.7_task-12_user-metrics.csv
0,0.934165,0.3337,0.284385,0.717744,0.282078,0.433518,0.665096,0.1978,../metrics/mfnn/2016-06-29_19.10.09_mfnn_ml-100k_e20_tt-0.7_task-2_user-metrics.csv
0,0.934074,0.342575,0.290737,0.716585,0.287646,0.445631,0.664009,0.195383,../metrics/mfnn/2016-06-29_18.49.44_mfnn_ml-100k_e20_tt-0.7_task-0_user-metrics.csv


In [15]:
# best config
with open('../models/mfnn/2016-06-29_19.46.38_mfnn_ml-100k_e20_tt-0.7_task-5_config.json') as f:
    pp.pprint(json.loads(f.read()))

{   u'ada_eps': 1e-06,
    u'adagrad': True,
    u'binarize': True,
    u'binarize_neg': 0,
    u'binarize_pos': 1,
    u'binarize_threshold': 1,
    u'd2v_model': u'doc2vec-models/2016-04-14_17.36.08_20e_pv-dbow_size50_lr0.025_window8_neg5',
    u'eval_in_parallel': False,
    u'experiment_name': u'mfnn_ml-100k_e20_tt-0.7_task-5',
    u'hit_threshold': 4,
    u'lr': 0.03,
    u'lr_decay': 0.02,
    u'metrics_save_dir': u'metrics/mfnn',
    u'model_save_dir': u'models/mfnn',
    u'nb_d2v_features': 50,
    u'nb_epochs': 20,
    u'nb_latent_f': 128,
    u'nb_movies': 1261,
    u'nb_users': 943,
    u'precision_recall_at_n': 20,
    u'ratings_path': u'data/splits/ml-100k/ratings.csv',
    u'reg_lambda': 0.01,
    u'run_eval': True,
    u'run_movie_metrics': True,
    u'sparse_item': True,
    u'test': True,
    u'test_path': u'data/splits/ml-100k/sparse-item/0.7-test.csv',
    u'top_n_predictions': 100,
    u'train_path': u'data/splits/ml-100k/sparse-item/0.7-train.csv',
    u'train_test

## ML-100k - 0.7 train / 0.3 test - sparse item

In [41]:
user_metrics = [
'../metrics/mfnn/2016-06-20_14.41.10_mfnn_ml-100k_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mfnn/2016-06-20_15.23.33_mfnn_ml-100k_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mfnn/2016-06-20_15.27.24_mfnn_ml-100k_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mfnn/2016-06-20_15.35.53_mfnn_ml-100k_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mfnn/2016-06-20_15.57.48_mfnn_ml-100k_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mfnn/2016-06-20_16.59.18_mfnn_ml-100k_e20_tt-0.7_task-1_user-metrics.csv',
'../metrics/mfnn/2016-06-20_17.56.21_mfnn_ml-100k_e20_tt-0.7_task-1_user-metrics.csv',
'../metrics/mfnn/2016-06-20_18.03.09_mfnn_ml-100k_e20_tt-0.7_task-1_user-metrics.csv',
'../metrics/mfnn/2016-06-20_19.02.32_mfnn_ml-100k_e20_tt-0.7_task-2_user-metrics.csv',
'../metrics/mfnn/2016-06-20_20.07.00_mfnn_ml-100k_e20_tt-0.7_task-2_user-metrics.csv',
'../metrics/mfnn/2016-06-20_20.14.07_mfnn_ml-100k_e20_tt-0.7_task-2_user-metrics.csv',
'../metrics/mfnn/2016-06-20_22.08.26_mfnn_ml-100k_e20_tt-0.7_task-3_user-metrics.csv',
'../metrics/mfnn/2016-06-20_22.35.49_mfnn_ml-100k_e20_tt-0.7_task-3_user-metrics.csv',
'../metrics/mfnn/2016-06-21_03.33.28_mfnn_ml-100k_e20_tt-0.7_task-1_user-metrics.csv',
'../metrics/mfnn/2016-06-21_04.38.48_mfnn_ml-100k_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mfnn/2016-06-21_06.21.30_mfnn_ml-100k_e20_tt-0.7_task-1_user-metrics.csv',
'../metrics/mfnn/2016-06-21_08.17.14_mfnn_ml-100k_e20_tt-0.7_task-2_user-metrics.csv',
'../metrics/mfnn/2016-06-21_09.15.54_mfnn_ml-100k_e20_tt-0.7_task-3_user-metrics.csv',
'../metrics/mfnn/2016-06-21_10.20.59_mfnn_ml-100k_e20_tt-0.7_task-3_user-metrics.csv',
'../metrics/mfnn/2016-06-21_13.27.13_mfnn_ml-100k_e20_tt-0.7_task-2_user-metrics.csv',
'../metrics/mfnn/2016-06-21_15.30.47_mfnn_ml-100k_e20_tt-0.7_task-3_user-metrics.csv',
'../metrics/mfnn/2016-06-21_19.30.24_mfnn_ml-100k_e20_tt-0.7_task-1_user-metrics.csv',
'../metrics/mfnn/2016-06-21_21.21.05_mfnn_ml-100k_e20_tt-0.7_task-2_user-metrics.csv',
'../metrics/mfnn/2016-06-22_09.33.59_mfnn_ml-100k_e20_tt-0.7_task-3_user-metrics.csv',
'../metrics/mfnn/2016-06-22_20.16.34_mfnn_ml-100k_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mfnn/2016-06-22_20.24.54_mfnn_ml-100k_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mfnn/2016-06-22_20.33.20_mfnn_ml-100k_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mfnn/2016-06-22_21.12.49_mfnn_ml-100k_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/mfnn/2016-06-22_22.20.35_mfnn_ml-100k_e20_tt-0.7_task-1_user-metrics.csv',
'../metrics/mfnn/2016-06-22_22.37.08_mfnn_ml-100k_e20_tt-0.7_task-1_user-metrics.csv',
'../metrics/mfnn/2016-06-22_23.11.42_mfnn_ml-100k_e20_tt-0.7_task-1_user-metrics.csv',
'../metrics/mfnn/2016-06-22_23.32.54_mfnn_ml-100k_e20_tt-0.7_task-1_user-metrics.csv',
'../metrics/mfnn/2016-06-23_00.30.44_mfnn_ml-100k_e20_tt-0.7_task-2_user-metrics.csv',
'../metrics/mfnn/2016-06-23_00.49.42_mfnn_ml-100k_e20_tt-0.7_task-2_user-metrics.csv',
'../metrics/mfnn/2016-06-23_01.22.10_mfnn_ml-100k_e20_tt-0.7_task-2_user-metrics.csv',
'../metrics/mfnn/2016-06-23_01.26.51_mfnn_ml-100k_e20_tt-0.7_task-2_user-metrics.csv',
'../metrics/mfnn/2016-06-23_02.26.18_mfnn_ml-100k_e20_tt-0.7_task-3_user-metrics.csv',
'../metrics/mfnn/2016-06-23_03.17.07_mfnn_ml-100k_e20_tt-0.7_task-3_user-metrics.csv',
'../metrics/mfnn/2016-06-23_03.52.30_mfnn_ml-100k_e20_tt-0.7_task-3_user-metrics.csv',
'../metrics/mfnn/2016-06-23_04.03.04_mfnn_ml-100k_e20_tt-0.7_task-3_user-metrics.csv',
]

In [42]:
df_results = get_df_results(user_metrics)
df_results.sort_values('auc', ascending=False)

Unnamed: 0,auc,avg_precision,f1,fcp,precision_at_20,recall_at_20,reciprocal_rank,spearman_rank_corr,path
0,0.933809,0.344144,0.292027,0.716052,0.289343,0.446031,0.675718,0.192061,../metrics/mfnn/2016-06-21_19.30.24_mfnn_ml-100k_e20_tt-0.7_task-1_user-metrics.csv
0,0.933496,0.306996,0.2696,0.715244,0.266702,0.415502,0.637832,0.190412,../metrics/mfnn/2016-06-20_22.08.26_mfnn_ml-100k_e20_tt-0.7_task-3_user-metrics.csv
0,0.930352,0.285952,0.254703,0.714188,0.252916,0.390911,0.613219,0.186889,../metrics/mfnn/2016-06-23_00.30.44_mfnn_ml-100k_e20_tt-0.7_task-2_user-metrics.csv
0,0.930178,0.285216,0.252084,0.714016,0.250477,0.387869,0.613027,0.186858,../metrics/mfnn/2016-06-22_20.16.34_mfnn_ml-100k_e20_tt-0.7_task-0_user-metrics.csv
0,0.930115,0.28408,0.253074,0.714633,0.25106,0.390257,0.608517,0.18849,../metrics/mfnn/2016-06-23_03.17.07_mfnn_ml-100k_e20_tt-0.7_task-3_user-metrics.csv
0,0.930071,0.28447,0.253021,0.714357,0.25175,0.388126,0.61392,0.187627,../metrics/mfnn/2016-06-23_02.26.18_mfnn_ml-100k_e20_tt-0.7_task-3_user-metrics.csv
0,0.930068,0.285446,0.254564,0.715417,0.253022,0.391685,0.615677,0.190508,../metrics/mfnn/2016-06-22_22.37.08_mfnn_ml-100k_e20_tt-0.7_task-1_user-metrics.csv
0,0.929988,0.284546,0.253263,0.713497,0.252015,0.387722,0.613499,0.186005,../metrics/mfnn/2016-06-22_20.24.54_mfnn_ml-100k_e20_tt-0.7_task-0_user-metrics.csv
0,0.929979,0.285054,0.253206,0.713157,0.25122,0.389316,0.61661,0.18498,../metrics/mfnn/2016-06-22_22.20.35_mfnn_ml-100k_e20_tt-0.7_task-1_user-metrics.csv
0,0.929759,0.283629,0.25143,0.712834,0.250212,0.386136,0.608028,0.183643,../metrics/mfnn/2016-06-22_20.33.20_mfnn_ml-100k_e20_tt-0.7_task-0_user-metrics.csv


In [43]:
# best config
with open('../models/mfnn/2016-06-21_19.30.24_mfnn_ml-100k_e20_tt-0.7_task-1_config.json') as f:
    pp.pprint(json.loads(f.read()))

{   u'ada_eps': 1e-06,
    u'adagrad': True,
    u'binarize': True,
    u'binarize_neg': 0,
    u'binarize_pos': 1,
    u'binarize_threshold': 1,
    u'd2v_model': u'doc2vec-models/2016-04-14_17.36.08_20e_pv-dbow_size50_lr0.025_window8_neg5',
    u'eval_in_parallel': True,
    u'experiment_name': u'mfnn_ml-100k_e20_tt-0.7_task-1',
    u'hit_threshold': 4,
    u'lr': 0.03,
    u'lr_decay': 0.01,
    u'metrics_save_dir': u'metrics/mfnn',
    u'model_save_dir': u'models/mfnn',
    u'nb_d2v_features': 50,
    u'nb_epochs': 20,
    u'nb_latent_f': 128,
    u'nb_movies': 1261,
    u'nb_users': 943,
    u'pool_size': 24,
    u'precision_recall_at_n': 20,
    u'ratings_path': u'data/splits/ml-100k/ratings.csv',
    u'reg_lambda': 0.01,
    u'run_eval': True,
    u'run_movie_metrics': True,
    u'sparse_item': True,
    u'test': True,
    u'test_path': u'data/splits/ml-100k/sparse-item/0.7-test.csv',
    u'top_n_predictions': 100,
    u'train_path': u'data/splits/ml-100k/sparse-item/0.7-train.c

In [36]:
# second best config
with open('../models/mfnn/2016-06-20_22.08.26_mfnn_ml-100k_e20_tt-0.7_task-3_config.json') as f:
    pp.pprint(json.loads(f.read()))

{   u'ada_eps': 1e-06,
    u'adagrad': True,
    u'binarize': True,
    u'binarize_neg': 0,
    u'binarize_pos': 1,
    u'binarize_threshold': 1,
    u'd2v_model': u'doc2vec-models/2016-04-14_17.36.08_20e_pv-dbow_size50_lr0.025_window8_neg5',
    u'eval_in_parallel': True,
    u'experiment_name': u'mfnn_ml-100k_e20_tt-0.7_task-3',
    u'hit_threshold': 4,
    u'lr': 0.015,
    u'lr_decay': 0.02,
    u'metrics_save_dir': u'metrics/mfnn',
    u'model_save_dir': u'models/mfnn',
    u'nb_d2v_features': 50,
    u'nb_epochs': 20,
    u'nb_latent_f': 128,
    u'nb_movies': 1261,
    u'nb_users': 943,
    u'pool_size': 24,
    u'precision_recall_at_n': 20,
    u'ratings_path': u'data/splits/ml-100k/ratings.csv',
    u'reg_lambda': 0.01,
    u'run_eval': True,
    u'run_movie_metrics': True,
    u'sparse_item': True,
    u'test': True,
    u'test_path': u'data/splits/ml-100k/sparse-item/0.7-test.csv',
    u'top_n_predictions': 100,
    u'train_path': u'data/splits/ml-100k/sparse-item/0.7-train.

## ML-1M - 0.7 train / 0.3 test - sparse item

In [53]:
user_metrics = [
'../metrics/mfnn/2016-06-24_19.32.18_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv',
'../metrics/mfnn/2016-06-24_19.32.34_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv',
'../metrics/mfnn/2016-06-24_20.20.25_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv',
'../metrics/mfnn/2016-06-24_22.34.07_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv',
'../metrics/mfnn/2016-06-25_22.58.04_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv',
'../metrics/mfnn/2016-06-26_00.27.34_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv',
'../metrics/mfnn/2016-06-26_08.08.43_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv',
'../metrics/mfnn/2016-06-26_22.30.57_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv',
'../metrics/mfnn/2016-06-26_22.39.12_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv',
'../metrics/mfnn/2016-06-27_00.50.04_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv',
]

In [54]:
df_results = get_df_results(user_metrics)
df_results.sort_values('auc', ascending=False)

Unnamed: 0,auc,avg_precision,f1,fcp,precision_at_20,recall_at_20,reciprocal_rank,spearman_rank_corr,path
0,0.930207,0.275614,0.247795,0.73166,0.305745,0.301779,0.652707,0.234691,../metrics/mfnn/2016-06-25_22.58.04_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv
0,0.928134,0.26224,0.235928,0.732212,0.293949,0.283973,0.63351,0.236068,../metrics/mfnn/2016-06-24_22.34.07_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv
0,0.928056,0.261269,0.236727,0.731971,0.294901,0.285401,0.627566,0.235839,../metrics/mfnn/2016-06-26_00.27.34_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv
0,0.927645,0.26147,0.236771,0.731512,0.29399,0.286007,0.628579,0.233787,../metrics/mfnn/2016-06-24_19.32.18_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv
0,0.924041,0.24533,0.223964,0.731849,0.28178,0.266547,0.610945,0.235134,../metrics/mfnn/2016-06-24_20.20.25_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv
0,0.923749,0.244848,0.223052,0.732099,0.281175,0.264956,0.614683,0.235651,../metrics/mfnn/2016-06-26_22.30.57_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv
0,0.923499,0.243704,0.22209,0.732006,0.280199,0.263121,0.609093,0.235507,../metrics/mfnn/2016-06-26_22.39.12_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv
0,0.912936,0.215241,0.199477,0.733152,0.25731,0.2309,0.576534,0.238848,../metrics/mfnn/2016-06-26_08.08.43_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv
0,0.899805,0.188665,0.177465,0.733476,0.232657,0.202566,0.546767,0.23899,../metrics/mfnn/2016-06-24_19.32.34_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv
0,0.899771,0.187774,0.176116,0.732703,0.230853,0.201047,0.546865,0.237,../metrics/mfnn/2016-06-27_00.50.04_mfnn_ml-1m_e10_tt-0.7_task-0_user-metrics.csv


In [55]:
# best config
with open('../models/mfnn/2016-06-25_22.58.04_mfnn_ml-1m_e10_tt-0.7_task-0_config.json') as f:
    pp.pprint(json.loads(f.read()))

{   u'ada_eps': 1e-06,
    u'adagrad': True,
    u'binarize': True,
    u'binarize_neg': 0,
    u'binarize_pos': 1,
    u'binarize_threshold': 1,
    u'd2v_model': u'doc2vec-models/2016-04-14_17.36.08_20e_pv-dbow_size50_lr0.025_window8_neg5',
    u'eval_in_parallel': True,
    u'experiment_name': u'mfnn_ml-1m_e10_tt-0.7_task-0',
    u'hit_threshold': 4,
    u'lr': 0.06,
    u'lr_decay': 0.02,
    u'metrics_save_dir': u'metrics/mfnn',
    u'model_save_dir': u'models/mfnn',
    u'nb_d2v_features': 50,
    u'nb_epochs': 10,
    u'nb_latent_f': 128,
    u'nb_movies': 3005,
    u'nb_users': 6040,
    u'pool_size': 8,
    u'precision_recall_at_n': 20,
    u'ratings_path': u'data/splits/ml-1m/ratings.csv',
    u'reg_lambda': 0.01,
    u'run_eval': True,
    u'run_movie_metrics': True,
    u'sparse_item': True,
    u'test': True,
    u'test_path': u'data/splits/ml-1m/sparse-item/0.7-test.csv',
    u'top_n_predictions': 100,
    u'train_path': u'data/splits/ml-1m/sparse-item/0.7-train.csv',
   

In [56]:
movie_metrics = [
'../metrics/mfnn/2016-06-24_19.32.18_mfnn_ml-1m_e10_tt-0.7_task-0_movie-metrics.csv',
'../metrics/mfnn/2016-06-24_19.32.34_mfnn_ml-1m_e10_tt-0.7_task-0_movie-metrics.csv',
'../metrics/mfnn/2016-06-24_20.20.25_mfnn_ml-1m_e10_tt-0.7_task-0_movie-metrics.csv',
'../metrics/mfnn/2016-06-24_22.34.07_mfnn_ml-1m_e10_tt-0.7_task-0_movie-metrics.csv',
'../metrics/mfnn/2016-06-25_22.58.04_mfnn_ml-1m_e10_tt-0.7_task-0_movie-metrics.csv',
'../metrics/mfnn/2016-06-26_00.27.34_mfnn_ml-1m_e10_tt-0.7_task-0_movie-metrics.csv',
'../metrics/mfnn/2016-06-26_08.08.43_mfnn_ml-1m_e10_tt-0.7_task-0_movie-metrics.csv',
'../metrics/mfnn/2016-06-26_22.30.57_mfnn_ml-1m_e10_tt-0.7_task-0_movie-metrics.csv',
'../metrics/mfnn/2016-06-26_22.39.12_mfnn_ml-1m_e10_tt-0.7_task-0_movie-metrics.csv',
'../metrics/mfnn/2016-06-27_00.50.04_mfnn_ml-1m_e10_tt-0.7_task-0_movie-metrics.csv',
]

In [57]:
df_cv_movie_metrics = get_df_results(movie_metrics)
df_cv_movie_metrics

Unnamed: 0,auc,movie_id,nb_times_in_top_n_predictions,path
0,0.918073,1999.3961,203.093477,../metrics/mfnn/2016-06-24_19.32.18_mfnn_ml-1m_e10_tt-0.7_task-0_movie-metrics.csv
0,0.890926,1999.3961,203.093477,../metrics/mfnn/2016-06-24_19.32.34_mfnn_ml-1m_e10_tt-0.7_task-0_movie-metrics.csv
0,0.915559,1999.3961,203.093477,../metrics/mfnn/2016-06-24_20.20.25_mfnn_ml-1m_e10_tt-0.7_task-0_movie-metrics.csv
0,0.91795,1999.3961,203.093477,../metrics/mfnn/2016-06-24_22.34.07_mfnn_ml-1m_e10_tt-0.7_task-0_movie-metrics.csv
0,0.920625,1999.3961,203.093477,../metrics/mfnn/2016-06-25_22.58.04_mfnn_ml-1m_e10_tt-0.7_task-0_movie-metrics.csv
0,0.918416,1999.3961,203.093477,../metrics/mfnn/2016-06-26_00.27.34_mfnn_ml-1m_e10_tt-0.7_task-0_movie-metrics.csv
0,0.905064,1999.3961,203.093477,../metrics/mfnn/2016-06-26_08.08.43_mfnn_ml-1m_e10_tt-0.7_task-0_movie-metrics.csv
0,0.914662,1999.3961,203.093477,../metrics/mfnn/2016-06-26_22.30.57_mfnn_ml-1m_e10_tt-0.7_task-0_movie-metrics.csv
0,0.914609,1999.3961,203.093477,../metrics/mfnn/2016-06-26_22.39.12_mfnn_ml-1m_e10_tt-0.7_task-0_movie-metrics.csv
0,0.891066,1999.3961,203.093477,../metrics/mfnn/2016-06-27_00.50.04_mfnn_ml-1m_e10_tt-0.7_task-0_movie-metrics.csv
