In [1]:
import numpy as np
import pandas as pd
import json
import pprint
import json
pp = pprint.PrettyPrinter(indent=4)
pd.options.display.max_colwidth = 100

In [2]:
def df_mean(path, apply_to_df):
    df_mean = pd.read_csv(path)
    df_mean = apply_to_df(df_mean)
        
    if 'nb_movies_not_in_train' in df_mean:
        df_mean = df_mean.drop(['nb_movies_not_in_train'],axis=1)
    if 'nb_test_ratings' in df_mean:
        df_mean = df_mean.drop(['nb_test_ratings'],axis=1)
    if 'nb_train_ratings' in df_mean:
        df_mean = df_mean.drop(['nb_train_ratings'],axis=1)
    if 'user_id' in df_mean:
        df_mean = df_mean.drop(['user_id'],axis=1)
    
    df_mean = df_mean.mean(0).to_frame().T
    df_mean['path'] = path
    
    return df_mean
    

def get_df_results(results, max_train_ratings=None):
    if not max_train_ratings:
        apply_to_df = lambda df: df[df['nb_test_ratings'] > 0]
    else:
        apply_to_df = lambda df: df[(df['nb_test_ratings'] > 0) & (df['nb_train_ratings'] <= max_train_ratings)]
        
    result_map = map(lambda path: df_mean(path, apply_to_df), results)
    return reduce(lambda x, y: x.append(y), result_map)

## ML-100k - 0.7 train / 0.3 test - sparse item

#### User Metrics

In [5]:
user_metrics = [
'../metrics/bprmf/2016-05-27_16.07.52_bprmf_ml-100k_e20_tt-0.7_task-22_user-metrics.csv',
'../metrics/bprmf/2016-05-27_16.07.53_bprmf_ml-100k_e20_tt-0.7_task-2_user-metrics.csv',
'../metrics/bprmf/2016-05-27_16.08.01_bprmf_ml-100k_e20_tt-0.7_task-11_user-metrics.csv',
'../metrics/bprmf/2016-05-27_16.08.02_bprmf_ml-100k_e20_tt-0.7_task-3_user-metrics.csv',
'../metrics/bprmf/2016-05-27_16.08.02_bprmf_ml-100k_e20_tt-0.7_task-6_user-metrics.csv',
'../metrics/bprmf/2016-05-27_16.08.27_bprmf_ml-100k_e20_tt-0.7_task-5_user-metrics.csv',
'../metrics/bprmf/2016-05-27_16.08.30_bprmf_ml-100k_e20_tt-0.7_task-1_user-metrics.csv',
'../metrics/bprmf/2016-05-27_16.14.44_bprmf_ml-100k_e20_tt-0.7_task-23_user-metrics.csv',
'../metrics/bprmf/2016-05-27_16.14.45_bprmf_ml-100k_e20_tt-0.7_task-7_user-metrics.csv',
'../metrics/bprmf/2016-05-27_16.14.47_bprmf_ml-100k_e20_tt-0.7_task-17_user-metrics.csv',
'../metrics/bprmf/2016-05-27_16.14.48_bprmf_ml-100k_e20_tt-0.7_task-13_user-metrics.csv',
'../metrics/bprmf/2016-05-27_16.14.49_bprmf_ml-100k_e20_tt-0.7_task-19_user-metrics.csv',
'../metrics/bprmf/2016-05-27_16.14.51_bprmf_ml-100k_e20_tt-0.7_task-18_user-metrics.csv',
'../metrics/bprmf/2016-05-27_16.14.57_bprmf_ml-100k_e20_tt-0.7_task-21_user-metrics.csv',
'../metrics/bprmf/2016-05-27_16.14.58_bprmf_ml-100k_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/bprmf/2016-05-27_16.14.58_bprmf_ml-100k_e20_tt-0.7_task-16_user-metrics.csv',
'../metrics/bprmf/2016-05-27_16.14.58_bprmf_ml-100k_e20_tt-0.7_task-9_user-metrics.csv',
'../metrics/bprmf/2016-05-27_16.15.01_bprmf_ml-100k_e20_tt-0.7_task-14_user-metrics.csv',
'../metrics/bprmf/2016-05-27_16.15.02_bprmf_ml-100k_e20_tt-0.7_task-4_user-metrics.csv',
'../metrics/bprmf/2016-05-27_16.15.42_bprmf_ml-100k_e20_tt-0.7_task-10_user-metrics.csv',
'../metrics/bprmf/2016-05-27_16.15.46_bprmf_ml-100k_e20_tt-0.7_task-8_user-metrics.csv',
'../metrics/bprmf/2016-05-27_16.15.47_bprmf_ml-100k_e20_tt-0.7_task-15_user-metrics.csv',
'../metrics/bprmf/2016-05-27_16.15.55_bprmf_ml-100k_e20_tt-0.7_task-20_user-metrics.csv',
'../metrics/bprmf/2016-05-27_16.15.58_bprmf_ml-100k_e20_tt-0.7_task-12_user-metrics.csv',
'../metrics/bprmf/2016-05-27_16.35.22_bprmf_ml-100k_e20_tt-0.7_task-18_user-metrics.csv',
'../metrics/bprmf/2016-05-27_16.35.24_bprmf_ml-100k_e20_tt-0.7_task-11_user-metrics.csv',
'../metrics/bprmf/2016-05-27_16.35.25_bprmf_ml-100k_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/bprmf/2016-05-27_16.35.25_bprmf_ml-100k_e20_tt-0.7_task-21_user-metrics.csv',
'../metrics/bprmf/2016-05-27_16.35.32_bprmf_ml-100k_e20_tt-0.7_task-15_user-metrics.csv',
'../metrics/bprmf/2016-05-27_16.35.35_bprmf_ml-100k_e20_tt-0.7_task-20_user-metrics.csv',
'../metrics/bprmf/2016-05-27_16.35.55_bprmf_ml-100k_e20_tt-0.7_task-23_user-metrics.csv',
'../metrics/bprmf/2016-05-27_16.35.57_bprmf_ml-100k_e20_tt-0.7_task-14_user-metrics.csv',
'../metrics/bprmf/2016-05-27_16.35.57_bprmf_ml-100k_e20_tt-0.7_task-3_user-metrics.csv',
'../metrics/bprmf/2016-05-27_16.35.58_bprmf_ml-100k_e20_tt-0.7_task-22_user-metrics.csv',
'../metrics/bprmf/2016-05-27_16.36.00_bprmf_ml-100k_e20_tt-0.7_task-12_user-metrics.csv',
'../metrics/bprmf/2016-05-27_16.42.10_bprmf_ml-100k_e20_tt-0.7_task-16_user-metrics.csv',
'../metrics/bprmf/2016-05-27_16.42.10_bprmf_ml-100k_e20_tt-0.7_task-2_user-metrics.csv',
'../metrics/bprmf/2016-05-27_16.42.10_bprmf_ml-100k_e20_tt-0.7_task-6_user-metrics.csv',
'../metrics/bprmf/2016-05-27_16.42.11_bprmf_ml-100k_e20_tt-0.7_task-4_user-metrics.csv',
'../metrics/bprmf/2016-05-27_16.42.11_bprmf_ml-100k_e20_tt-0.7_task-7_user-metrics.csv',
'../metrics/bprmf/2016-05-27_16.42.21_bprmf_ml-100k_e20_tt-0.7_task-1_user-metrics.csv',
'../metrics/bprmf/2016-05-27_16.42.21_bprmf_ml-100k_e20_tt-0.7_task-9_user-metrics.csv',
'../metrics/bprmf/2016-05-27_16.42.22_bprmf_ml-100k_e20_tt-0.7_task-13_user-metrics.csv',
'../metrics/bprmf/2016-05-27_16.42.24_bprmf_ml-100k_e20_tt-0.7_task-19_user-metrics.csv',
'../metrics/bprmf/2016-05-27_16.42.24_bprmf_ml-100k_e20_tt-0.7_task-8_user-metrics.csv',
'../metrics/bprmf/2016-05-27_16.43.13_bprmf_ml-100k_e20_tt-0.7_task-5_user-metrics.csv',
'../metrics/bprmf/2016-05-27_16.43.15_bprmf_ml-100k_e20_tt-0.7_task-10_user-metrics.csv',
'../metrics/bprmf/2016-05-27_16.43.15_bprmf_ml-100k_e20_tt-0.7_task-17_user-metrics.csv',
]

In [6]:
df_results = get_df_results(user_metrics)
df_results.sort_values('auc', ascending=False)

Unnamed: 0,auc,avg_precision,f1,fcp,precision_at_20,recall_at_20,reciprocal_rank,spearman_rank_corr,path
0,0.922878,0.188243,0.185173,0.707426,0.183616,0.287105,0.370203,0.167195,../metrics/bprmf/2016-05-27_16.15.55_bprmf_ml-100k_e20_tt-0.7_task-20_user-metrics.csv
0,0.92188,0.196939,0.187535,0.707451,0.182927,0.299526,0.420631,0.164061,../metrics/bprmf/2016-05-27_16.42.21_bprmf_ml-100k_e20_tt-0.7_task-1_user-metrics.csv
0,0.921337,0.197371,0.189586,0.70406,0.18685,0.29675,0.420262,0.15374,../metrics/bprmf/2016-05-27_16.42.10_bprmf_ml-100k_e20_tt-0.7_task-2_user-metrics.csv
0,0.921331,0.193502,0.186059,0.7065,0.18176,0.296874,0.386387,0.163833,../metrics/bprmf/2016-05-27_16.15.47_bprmf_ml-100k_e20_tt-0.7_task-15_user-metrics.csv
0,0.921211,0.198277,0.188063,0.707101,0.182503,0.300069,0.413831,0.164617,../metrics/bprmf/2016-05-27_16.43.15_bprmf_ml-100k_e20_tt-0.7_task-10_user-metrics.csv
0,0.921124,0.19067,0.179489,0.703706,0.177996,0.27535,0.399438,0.155327,../metrics/bprmf/2016-05-27_16.08.01_bprmf_ml-100k_e20_tt-0.7_task-11_user-metrics.csv
0,0.920919,0.187165,0.179935,0.703633,0.175186,0.285662,0.378797,0.154488,../metrics/bprmf/2016-05-27_16.43.15_bprmf_ml-100k_e20_tt-0.7_task-17_user-metrics.csv
0,0.920782,0.198243,0.189574,0.705558,0.186161,0.294393,0.429078,0.161578,../metrics/bprmf/2016-05-27_16.14.57_bprmf_ml-100k_e20_tt-0.7_task-21_user-metrics.csv
0,0.920746,0.200915,0.194214,0.707102,0.188176,0.31158,0.415515,0.164097,../metrics/bprmf/2016-05-27_16.15.58_bprmf_ml-100k_e20_tt-0.7_task-12_user-metrics.csv
0,0.92066,0.197039,0.191152,0.706221,0.187275,0.299865,0.425236,0.162187,../metrics/bprmf/2016-05-27_16.14.49_bprmf_ml-100k_e20_tt-0.7_task-19_user-metrics.csv


In [11]:
# best config
with open('../models/bprmf/2016-05-27_16.15.55_bprmf_ml-100k_e20_tt-0.7_task-20_config.json') as f:
    pp.pprint(json.loads(f.read()))

{   u'ada_eps': 1e-06,
    u'adagrad': True,
    u'binarize': True,
    u'binarize_neg': 0,
    u'binarize_pos': 1,
    u'binarize_threshold': 1,
    u'eval_in_parallel': False,
    u'experiment_name': u'bprmf_ml-100k_e20_tt-0.7_task-20',
    u'hit_threshold': 4,
    u'init_params_scale': 0.001,
    u'lr': 0.03,
    u'lr_decay': 0.02,
    u'metrics_save_dir': u'metrics/bprmf',
    u'model_save_dir': u'models/bprmf',
    u'nb_epochs': 20,
    u'nb_latent_f': 128,
    u'precision_recall_at_n': 20,
    u'ratings_path': u'data/splits/ml-100k/ratings.csv',
    u'reg_lambda': 0.01,
    u'run_eval': True,
    u'run_movie_metrics': False,
    u'sparse_item': True,
    u'test': True,
    u'test_path': u'data/splits/ml-100k/sparse-item/0.7-test.csv',
    u'top_n_predictions': 100,
    u'train_path': u'data/splits/ml-100k/sparse-item/0.7-train.csv',
    u'train_test_split': 0.7,
    u'triplet_sample_factor': 5,
    u'val': False,
    u'verbose': 0}


## ML-1m - 0.7 train / 0.3 test - sparse item

In [3]:
user_metrics = [
'../metrics/bprmf/2016-05-24_16.39.57_bprmf_ml-1m_e20_tt-0.7_task-16_user-metrics.csv',
'../metrics/bprmf/2016-05-24_16.40.02_bprmf_ml-1m_e20_tt-0.7_task-12_user-metrics.csv',
'../metrics/bprmf/2016-05-24_16.40.04_bprmf_ml-1m_e20_tt-0.7_task-2_user-metrics.csv',
'../metrics/bprmf/2016-05-24_16.40.05_bprmf_ml-1m_e20_tt-0.7_task-11_user-metrics.csv',
'../metrics/bprmf/2016-05-24_16.40.08_bprmf_ml-1m_e20_tt-0.7_task-17_user-metrics.csv',
'../metrics/bprmf/2016-05-24_16.40.12_bprmf_ml-1m_e20_tt-0.7_task-6_user-metrics.csv',
'../metrics/bprmf/2016-05-24_16.40.13_bprmf_ml-1m_e20_tt-0.7_task-4_user-metrics.csv',
'../metrics/bprmf/2016-05-24_16.41.28_bprmf_ml-1m_e20_tt-0.7_task-15_user-metrics.csv',
'../metrics/bprmf/2016-05-24_16.41.32_bprmf_ml-1m_e20_tt-0.7_task-20_user-metrics.csv',
'../metrics/bprmf/2016-05-24_16.46.17_bprmf_ml-1m_e20_tt-0.7_task-23_user-metrics.csv',
'../metrics/bprmf/2016-05-24_16.46.17_bprmf_ml-1m_e20_tt-0.7_task-7_user-metrics.csv',
'../metrics/bprmf/2016-05-24_16.46.21_bprmf_ml-1m_e20_tt-0.7_task-22_user-metrics.csv',
'../metrics/bprmf/2016-05-24_16.46.51_bprmf_ml-1m_e20_tt-0.7_task-3_user-metrics.csv',
'../metrics/bprmf/2016-05-24_17.53.16_bprmf_ml-1m_e20_tt-0.7_task-14_user-metrics.csv',
'../metrics/bprmf/2016-05-24_17.53.45_bprmf_ml-1m_e20_tt-0.7_task-18_user-metrics.csv',
'../metrics/bprmf/2016-05-24_17.53.54_bprmf_ml-1m_e20_tt-0.7_task-5_user-metrics.csv',
'../metrics/bprmf/2016-05-24_17.54.17_bprmf_ml-1m_e20_tt-0.7_task-1_user-metrics.csv',
'../metrics/bprmf/2016-05-24_17.54.24_bprmf_ml-1m_e20_tt-0.7_task-0_user-metrics.csv',
'../metrics/bprmf/2016-05-24_17.55.43_bprmf_ml-1m_e20_tt-0.7_task-21_user-metrics.csv',
'../metrics/bprmf/2016-05-24_17.55.43_bprmf_ml-1m_e20_tt-0.7_task-8_user-metrics.csv',
'../metrics/bprmf/2016-05-24_17.55.59_bprmf_ml-1m_e20_tt-0.7_task-13_user-metrics.csv',
'../metrics/bprmf/2016-05-24_17.56.17_bprmf_ml-1m_e20_tt-0.7_task-9_user-metrics.csv',
'../metrics/bprmf/2016-05-24_18.03.48_bprmf_ml-1m_e20_tt-0.7_task-10_user-metrics.csv',
'../metrics/bprmf/2016-05-24_18.04.12_bprmf_ml-1m_e20_tt-0.7_task-19_user-metrics.csv',
]

In [4]:
df_results = get_df_results(user_metrics)
df_results.sort_values('auc', ascending=False)

Unnamed: 0,auc,avg_precision,f1,fcp,precision_at_20,recall_at_20,reciprocal_rank,spearman_rank_corr,path
0,0.91856,0.179407,0.168981,0.720853,0.213907,0.2022,0.443407,0.202925,../metrics/bprmf/2016-05-24_18.04.12_bprmf_ml-1m_e20_tt-0.7_task-19_user-metrics.csv
0,0.918177,0.183335,0.17019,0.724192,0.218593,0.201278,0.450858,0.21241,../metrics/bprmf/2016-05-24_17.55.43_bprmf_ml-1m_e20_tt-0.7_task-21_user-metrics.csv
0,0.917479,0.175583,0.165085,0.720914,0.209801,0.197282,0.440137,0.202883,../metrics/bprmf/2016-05-24_16.46.51_bprmf_ml-1m_e20_tt-0.7_task-3_user-metrics.csv
0,0.915558,0.151962,0.134987,0.716067,0.176614,0.15957,0.372497,0.188303,../metrics/bprmf/2016-05-24_16.46.21_bprmf_ml-1m_e20_tt-0.7_task-22_user-metrics.csv
0,0.915223,0.172154,0.158875,0.721709,0.208725,0.184781,0.440034,0.205155,../metrics/bprmf/2016-05-24_17.53.45_bprmf_ml-1m_e20_tt-0.7_task-18_user-metrics.csv
0,0.913422,0.159973,0.146118,0.7207,0.191316,0.170988,0.400378,0.201375,../metrics/bprmf/2016-05-24_16.41.28_bprmf_ml-1m_e20_tt-0.7_task-15_user-metrics.csv
0,0.911484,0.162271,0.149599,0.72162,0.196217,0.173647,0.413086,0.204408,../metrics/bprmf/2016-05-24_16.40.08_bprmf_ml-1m_e20_tt-0.7_task-17_user-metrics.csv
0,0.905427,0.163419,0.152848,0.721721,0.198435,0.180741,0.449689,0.204893,../metrics/bprmf/2016-05-24_18.03.48_bprmf_ml-1m_e20_tt-0.7_task-10_user-metrics.csv
0,0.898353,0.162375,0.150951,0.725995,0.195745,0.180301,0.466317,0.216685,../metrics/bprmf/2016-05-24_17.53.54_bprmf_ml-1m_e20_tt-0.7_task-5_user-metrics.csv
0,0.895735,0.162838,0.151516,0.727426,0.196192,0.18064,0.472089,0.221207,../metrics/bprmf/2016-05-24_17.54.24_bprmf_ml-1m_e20_tt-0.7_task-0_user-metrics.csv


In [5]:
# best config
with open('../models/bprmf/2016-05-24_18.04.12_bprmf_ml-1m_e20_tt-0.7_task-19_config.json') as f:
    pp.pprint(json.loads(f.read()))

{   u'ada_eps': 1e-06,
    u'adagrad': True,
    u'binarize': True,
    u'binarize_neg': 0,
    u'binarize_pos': 1,
    u'binarize_threshold': 1,
    u'eval_in_parallel': False,
    u'experiment_name': u'bprmf_ml-1m_e20_tt-0.7_task-19',
    u'hit_threshold': 4,
    u'init_params_scale': 0.001,
    u'lr': 0.03,
    u'lr_decay': 0.02,
    u'metrics_save_dir': u'metrics/bprmf',
    u'model_save_dir': u'models/bprmf',
    u'nb_epochs': 20,
    u'nb_latent_f': 128,
    u'precision_recall_at_n': 20,
    u'ratings_path': u'data/splits/ml-1m/ratings.csv',
    u'reg_lambda': 0.001,
    u'run_eval': True,
    u'run_movie_metrics': False,
    u'sparse_item': True,
    u'test': True,
    u'test_path': u'data/splits/ml-1m/sparse-item/0.7-test.csv',
    u'top_n_predictions': 100,
    u'train_path': u'data/splits/ml-1m/sparse-item/0.7-train.csv',
    u'train_test_split': 0.7,
    u'triplet_sample_factor': 5,
    u'val': False,
    u'verbose': 0}
