In [None]:
%%javascript

IPython.OutputArea.auto_scroll_threshold = 9999

In [None]:
cd ../../

In [3]:

from matplotlib import pyplot
import numpy as np
import pandas
from matplotlib import pyplot as plt
from scipy.stats import rankdata
from IPython.display import display, HTML

from bayesian_benchmarks.database_utils import Database
from bayesian_benchmarks.data import  _ALL_REGRESSION_DATATSETS
from bayesian_benchmarks.data import  _ALL_CLASSIFICATION_DATATSETS
ALL_DATATSETS = {}
ALL_DATATSETS.update(_ALL_REGRESSION_DATATSETS)
ALL_DATATSETS.update(_ALL_CLASSIFICATION_DATATSETS)
from bayesian_benchmarks.data import regression_datasets
from bayesian_benchmarks.data import classification_datasets


1


In [8]:
def rankarray(A):
    ranks = []
    for a in A:
        ranks.append(rankdata(a))
    return np.array(ranks)


def read_regression_classification(fs, models_names, datasets, task,file='results.db'):
    if task == 'classification':
        fields = ['dataset', 'N', 'D', 'K'] + [m[1] for m in models_names]
    else:
        fields = ['dataset', 'N', 'D'] + [m[1] for m in models_names]

    results = {}
    for f in fs:
        results[f] = {'table':{f:[] for f in fields}, 'vals':[]}

    with Database('bayesian_benchmarks/results/'+file) as db:

        for dataset in datasets:
            for f in fs:
                results[f]['table']['dataset'].append(dataset[:10])
                results[f]['table']['N'].append(ALL_DATATSETS[dataset].N)
                results[f]['table']['D'].append(ALL_DATATSETS[dataset].D)
                if task == 'classification':
                    results[f]['table']['K'].append(ALL_DATATSETS[dataset].K)

            row = {f:[] for f in fs}
            for model, name in models_names:
                res = db.read(task, fs, {'model':model, 
                                         'dataset':dataset})
                if len(res) == 0:
                    for f in fs:
                        results[f]['table'][name].append('')
                        row[f].append(np.nan)
                else:
                    for i, f in enumerate(fs):
                        if f=='test_loglik':
                            L = [-float(l[i]) for l in res]
                        else:
                            L = [float(l[i]) for l in res]
                        m = np.average(L)
                        std = np.std(L) if len(L) > 1 else np.nan
                        if m < 1000 and m > -1000:
                            r = '{:.3f}({:.3f})'.format(m, std)
                            row[f].append(m)
                        else:
                            r = 'nan'
                            row[f].append(np.nan)

                        results[f]['table'][name].append(r)

            #             stderr = np.std(L)/float(len(L))**0.5
            #             r = '{:.3f} ({:.3f})'.format(m, stderr)
            for f in fs:   
                results[f]['vals'].append(row[f])


    for f in fs:
        if 'unnormalized' not in f:
            vals = np.array(results[f]['vals'])

            avgs = np.nanmean(vals, 0)
            meds = np.nanmedian(vals, 0)
            rks = np.nanmean(rankarray(vals), 0)

            for s, n in [[avgs, 'avg'], [meds, 'median'], [rks, 'avg rank']]:
                results[f]['table']['dataset'].append(n)
                results[f]['table']['N'].append('')
                results[f]['table']['D'].append('')
                if task == 'classification':
                    results[f]['table']['K'].append('')
                for ss, name in zip(s, [m[1] for m in models_names]):
                    results[f]['table'][name].append('{:.3f}'.format(ss))
    
    return results, fields

In [None]:
models_names = [
                ['gPoE_100_100_clustering_variance', 'rBCM/gPoE_var'],
               ['bar_100_100_clustering_variance', 'bar_var'],
               ['rBCM_100_100_clustering_diff_entr', 'rbcm_entr'],
               ['gp', 'gp'],
               ['linear', 'linear']]
                

fs = 'test_loglik', 'test_rmse', 'test_loglik_unnormalized', 'test_rmse_unnormalized'

results, fields = read_regression_classification(fs, models_names, regression_datasets, 'regression')


In [None]:

print('NLPDSd')
display(HTML(pandas.DataFrame(results['test_loglik']['table'], columns=fields).to_html(index=False)))


print('normalised test rmse')
display(HTML(pandas.DataFrame(results['test_rmse']['table'], columns=fields).to_html(index=False)))


In [None]:
models_names = [
               ['bar_10_500_random_variance', 'bar_var'],
               ['gPoE_10_500_random_variance', 'gpoe_var'],
               ['gPoE_10_500_random_uniform', 'gpoe_unif'],
               ['linear', 'linear']]
                

fs = 'top_1_acc', 'top_2_acc', 'top_3_acc', 'test_loglik'

results, fields = read_regression_classification(fs, models_names, classification_datasets, 'classification')


In [None]:

print('top 1 accuracy')
display(HTML(pandas.DataFrame(results['top_1_acc']['table'], columns=fields).to_html(index=False)))


print('top 2 accuracy')
display(HTML(pandas.DataFrame(results['top_2_acc']['table'], columns=fields).to_html(index=False)))

print('top 3 accuracy')
display(HTML(pandas.DataFrame(results['top_3_acc']['table'], columns=fields).to_html(index=False)))

print('NLPDs')
display(HTML(pandas.DataFrame(results['test_loglik']['table'], columns=fields).to_html(index=False)))
