Hierarchical evaluation - Graphs
========

In [None]:
#use full width of screen in Jupyter notebooks
# from IPython.core.display import display, HTML
# display(HTML("<style>.container { width:100% !important; }</style>"))

import numpy as np
import pandas as pd
import glob, os.path, sys, math, re

from techniques import sortedTechniques, sortedDatasets

%matplotlib inline
from plotting.plots import *

from sklearn.model_selection import StratifiedKFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import Imputer, StandardScaler, LabelEncoder

from metric_learn import LMNN, NCA, LFDA, Covariance#, CMAES, FullMatrixTransformer, NeuralNetworkTransformer
from metric_learn import ITML_Supervised, SDML_Supervised, LSML_Supervised, RCA_Supervised

import plotly
plotly.tools.set_credentials_file(username='sveco', api_key='8701ghzf0i')
import plotly.offline as py
import plotly.graph_objs as go
py.init_notebook_mode()

datasetsDirectory = 'datasets'
resultsDirectory = 'results/classification'
dumpsDirectory = 'results/dumps'

if not os.path.exists(resultsDirectory):
    os.makedirs(resultsDirectory)

if not os.path.exists(dumpsDirectory):
    os.makedirs(dumpsDirectory)

def gfn(filename):
    odir = '../thesis-distance-metric-learning/thesis/graphs/classification'
    if not os.path.exists(odir):
        os.makedirs(odir)
    return '{}/{}'.format(odir, filename)
    
default_n_jobs = 8
default_random_state = 789
default_n_folds = 10
default_shuffle = True

import logging
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
logger.handlers = []
fh = logging.FileHandler("{}/error.log".format(resultsDirectory))
fh.setLevel(logging.DEBUG)
logger.addHandler(fh)
fh = logging.StreamHandler(sys.stdout)
fh.setLevel(logging.ERROR)
logger.addHandler(fh)

import pickle
def save_object(obj, filename):
    with open("{}/dump_{}.bin".format(dumpsDirectory,filename), 'wb') as output:
        logger.info("saving file" + filename)
        pickle.dump(obj, output, pickle.HIGHEST_PROTOCOL)
        
def load_obj(name, resultsDirectory=resultsDirectory):
    with open('{}/{}.pkl'.format(resultsDirectory, name), 'rb') as f:
        return pickle.load(f)

Statistics
===========

In [None]:
resultsMask = "{}/*_result.csv".format(resultsDirectory)
alldatatable = pd.concat([pd.DataFrame.from_csv(x) for x in sorted(glob.glob(resultsMask))])

# BUGFIX - ZERO SCORES
# scores = [ x for x in alldatatable.columns.values if x[:5]=='score']
# alldatatable['mean'] = alldatatable[scores].mean(axis=1)

alldata = alldatatable.set_index(['dataset','technique', alldatatable.index])
alldata

In [None]:
alldataknn = alldatatable.set_index(['dataset','technique', 'knn__n_neighbors', alldatatable.index])
alldataknn

# Best run successrate per dataset

In [None]:
showNorm = True

def printCell(x, y, best):
    if best:
        return " & $\\bm{{{:.2f}\pm{:.2f}}}$".format(x, y)
    else:
        return " & ${:.2f}\pm{:.2f}$".format(x, y)

def printTime(t, best):
    if best:
        return " & $\\bm{{{:.2f}}}$".format(t)
    else:
        return " & ${:.2f}$".format(t)
    
def printError():
    return " & Error ".format()

def printEmpty():
    return " & Timeout ".format()

In [None]:
def expandTuple(x, n):
    if n==1:
        return x
    return [x]*n

def getTableResults(calculateCell, tuplesSize=1):
    results = {}
    for dataset in sortedDatasets:
        
        results[dataset] = {}
        for i, (techniqueIndex, technique) in enumerate(sortedTechniques):
#             if techniqueIndex[:5]!='stand': continue

            techniqueRows = alldata.loc[dataset].loc[techniqueIndex]
            if techniqueRows.shape[0]==0:
                results[dataset][techniqueIndex] = expandTuple(float('NaN'), tuplesSize)
                continue
            if (not showNorm) and technique[:4] == 'norm':
                results[dataset][techniqueIndex] = expandTuple(float('NaN'), tuplesSize)
                continue

            bestRowNum = techniqueRows['mean'].argmax()    
            
#             if techniqueIndex=='CMAES+kNN':
#                 print(dataset)
#                 bestRowNum = techniqueRows[techniqueRows['cmaes__n_gen']==100]['mean'].argmax()
            
            bestRow = techniqueRows.iloc[bestRowNum,:]

            if isinstance(bestRow['error'], str):
                print(technique, dataset, bestRow['error'])
                results[dataset][techniqueIndex] = expandTuple('E', tuplesSize)
                continue

            scores = [ x for x in alldata.columns.values if x[:5]=='score']
            if sum(bestRow[scores].values)==0:
                results[dataset][techniqueIndex] = expandTuple(float('NaN'), tuplesSize)
                continue

            results[dataset][techniqueIndex] = calculateCell(bestRow)
    return results

def printErrorTable(datasets, results):
    for dataset in datasets:
    #     print("& \\rot{{{}}} ".format(dataset), end='')
    #     print("& {} ".format(dataset), end='')
        print("& \\multicolumn{{1}}{{c}}{{{}}} ".format(dataset), end='')

    print('\\\\ \n\\midrule')

    for i, (techniqueIndex, technique) in enumerate(sortedTechniques):
#         if techniqueIndex[:5]=='stand': continue

        print(technique, end='')

        for dataset in datasets:
            re,rs = results[dataset][techniqueIndex]

            errors = [y for x,y in results[dataset].items() if (not isinstance(y[0], str)) and (not math.isnan(y[0]))]
            beste,bests = min(errors, key=lambda item: item[0])

            if re=='E':
                print(printError(), end='')
            elif math.isnan(re):
                print(printEmpty(), end='')
            else:
                print(printCell(re, rs, best='{:.2f}'.format(re)=='{:.2f}'.format(beste)), end='')
        print(' \\\\')
        
def printTimeTable(datasets, results):
    for dataset in datasets:
    #     print("& \\rot{{{}}} ".format(dataset), end='')
    #     print("& {} ".format(dataset), end='')
        print("& \\multicolumn{{1}}{{c}}{{{}}} ".format(dataset), end='')
        
    transformerName = {
        'full': 'full',
        'diagonal': 'diag',
    }

    print('\\\\ \n\\midrule')

    for transformer in ['full', 'diagonal']:
    
        for i, (techniqueIndex, technique) in enumerate(sortedTechniques):
            if techniqueIndex[:5]!='stand': continue
            if techniqueIndex == 'stand+kNN': continue
            if i <= 9 and transformer=='diagonal': continue
                
            if i <= 9:
                print("{}".format(technique), end='')
            else:
                print("{} ({})".format(technique, transformerName[transformer]), end='')

            for dataset in datasets:
                if transformer not in results[dataset][techniqueIndex]: continue
                re = results[dataset][techniqueIndex][transformer]

                errors = []
                for m,v in results[dataset].items():
                    for t,vv in v.items():
                        if isinstance(vv, str): continue
                        if math.isnan(vv): continue
                        errors.append(vv)
#                 errors = [y for x,y in results[dataset].items() if (not isinstance(y, str)) and (not math.isnan(y))]

                if re=='E':
                    print(printError(), end='')
                elif math.isnan(re) or len(errors)==0:
                    print(printEmpty(), end='')
                else:

                    print(printTime(re, best='{:.2f}'.format(re)=='{:.2f}'.format(min(errors))), end='')
            print(' \\\\')

In [None]:
def calculateErrors(bestRow):
    scores = [ x for x in alldata.columns.values if x[:5]=='score']
    return (100*(1.0-np.mean(bestRow[scores])), np.std(100*bestRow[scores]))

results = getTableResults(calculateErrors, tuplesSize=2)
printErrorTable(sortedDatasets[:5], results)
print('\\midrule')
printErrorTable(sortedDatasets[5:], results)

In [None]:
results = []
for file in glob.glob("{}/*.pkl".format('results/runtimes')):
    results.append(file)
resultsByDataset = {}
for x in results:
    _,_,filename = re.split('/|\\\\', x)
    datasetName,methodName,transformer = filename[:-4].split('__')
    if datasetName not in resultsByDataset:
        resultsByDataset[datasetName] = {}
    if methodName not in resultsByDataset[datasetName]:
        resultsByDataset[datasetName][methodName] = {}
    resultsByDataset[datasetName][methodName][transformer] = np.mean(load_obj(filename[:-4], 'results/runtimes'))

printTimeTable(sortedDatasets[:5], resultsByDataset)
print('\\midrule')
printTimeTable(sortedDatasets[5:], resultsByDataset)

In [None]:
traces = []
data = []
labels = []
for techniqueIndex, technique in sortedTechniques:
    if techniqueIndex[:5]!="stand":continue
    labels.append(technique)
    means = []
    for dataset in alldata.index.levels[0]:
        techniqueRows = alldata.loc[dataset].loc[techniqueIndex]
        if techniqueRows.shape[0]==0:
            means.append(np.nan)
            continue
        bestRowNum = techniqueRows['mean'].argmax()
        bestRow = techniqueRows.loc[bestRowNum,:]

        best = bestRow['mean']
        if best == 0:
            means.append(np.nan)
        else:
            means.append(best)

    param_labels = []
    data.append(means)

fig, [ax] = startGraphing('Successrates', size=(8,5))
plotLines(ax, data, labels, alldata.index.levels[0], rotateLabels=90)
endGraphing(fig)

In [None]:
fig, axes = startGraphing(None, size=(8,10), cols=2, N=6) # '`k` in kNN classifier'
ii=0

showBestParams = False
showNorm = True
for dataset in alldata.index.levels[0]:
    if dataset in ['digits6', 'mice-protein', 'iris', 'ionosphere']: continue
    
    traces = []
    means = []
    techniques = []
    boxdata = []
    skipped = []
    for i, (techniqueIndex, technique) in enumerate(sortedTechniques):
        techniqueRows = alldata.loc[dataset].loc[techniqueIndex]
        if techniqueRows.shape[0]==0:
            skipped.append(i)
            continue
#         if technique[:4] == 'norm':
#             skipped.append(i)
#             continue
        
        bestRowNum = techniqueRows['mean'].argmax()
        bestRow = techniqueRows.iloc[bestRowNum,:]
        
        scores = [ x for x in alldata.columns.values if x[:5]=='score']
        train_scores = [ x for x in alldata.columns.values if x[:11]=='train_score']
        if sum(bestRow[scores].values)==0:
            skipped.append(i)
            continue
            
        means.append( bestRow['mean'] ) # if bestRow['mean']>0 else np.median(bestRow[scores].values)
#         means.append( bestRow['train_mean'] )
        techniques.append( technique )
#         techniques.append( technique+' [train]' )
        boxdata.append([x for x in bestRow[scores].values if x > 0])
        
#         if technique == 'CMA-ES':
#             cmaes = [x for x in bestRow[scores].values if x > 0]
#         if technique == 'Euclidean':
#             euc = [x for x in bestRow[scores].values if x > 0]
        
        if showBestParams and techniqueIndex=='CMAES+kNN':
            print("======",dataset,"======",techniqueIndex,"======")
            print(bestRow.dropna().drop(scores+train_scores+['std', 'train_std', 'mean', 'train_mean']+['time1','time2','time3']))
            print()

#     title = '`{}` dataset'.format(dataset)
#     plt, [ax] = startGraphing()
#     plotBox(ax, boxdata, techniques, means=None, title=title, doubleColors=showNorm, skipped=skipped)
#     endGraphing(plt, filename=gfn('sr_{}'.format(dataset)))
#     break

    title = '`{}` dataset'.format(dataset)
    plotBox(axes[ii], boxdata, techniques, means=None, title=title, doubleColors=showNorm, skipped=skipped)
    ii += 1

endGraphing(fig, filename=gfn('sr'))

# Average success rate across all datasets (from best runs only!)

In [None]:
valuesPerTechnique = {}
for dataset in alldata.index.levels[0]:
    for technique, techniqueName in sortedTechniques:
        techniqueRows = alldata.loc[dataset].loc[technique]
        if techniqueRows.shape[0]==0:
            continue
            
        bestRowNum = techniqueRows['mean'].argmax()
        bestRow = techniqueRows.iloc[bestRowNum,:]
        scores = [ x for x in alldata.columns.values if x[:5]=='score']
        
        if technique not in valuesPerTechnique:
            valuesPerTechnique[technique] = {'means':[], 'scores':[]}
        
        # SKIP ALL WHERE THERE IS SUCCESSRATE 0 IN ANY FOLD
        if any(x == 0 for x in bestRow[scores].values):
            continue
        
        valuesPerTechnique[technique]['means'].append(bestRow['mean'])
        valuesPerTechnique[technique]['scores'].extend(bestRow[scores].values)

# traces = []
# means = []
# for technique,value in sortedTechniques:
#     traces.append( go.Box(
#             y=valuesPerTechnique[technique]['scores'],
#             name=technique,
#     ))
#     means.append((technique, np.mean(valuesPerTechnique[technique]['means'])))
    
# traces.append( go.Scatter( 
#         x=[x[0] for x in means],
#         y=[x[1] for x in means],
#         mode='lines', name='mean', marker=dict(color='black')
#     ))
    
# layout = go.Layout(
#     yaxis=dict(title='successrate', zeroline=False ),
#     title='Aggregated',
# )

# fig = go.Figure(data=traces, layout=layout)
# py.iplot(fig)
# # plotly.plotly.image.save_as(fig, filename='{}/{}-plot.png'.format(resultsDirectory, dataset))

boxdata = []
means = []
filteredTechniques = []
for technique,value in sortedTechniques:
    boxdata.append(valuesPerTechnique[technique]['scores'])
    means.append(np.mean(valuesPerTechnique[technique]['means']))
    filteredTechniques.append(value)

plotBox('Aggregated scores', boxdata, filteredTechniques, means, doubleColors=showNorm, skipped=skipped)

# Times for best run per dataset

In [None]:
for dataset in alldata.index.levels[0]:
    
    traces = []
    for timeCol in [ x for x in alldata.columns.values if x[:4]=='time']:
        times = []
        for technique in alldata.index.levels[1]:
            techniqueRows = alldata.loc[dataset].loc[technique]
            if techniqueRows.shape[0]==0:
                continue
                
            bestRowNum = techniqueRows['mean'].argmax()
            bestRow = techniqueRows.iloc[bestRowNum,:]
            
            times.append( bestRow[timeCol] )

        times = [0 if np.isnan(x) else x for x in times]
            
        traces.append(
            go.Bar(
                y=times,
                x=alldata.index.levels[1],
                name=timeCol
            )
        )
        
    layout = go.Layout(
        barmode='stack',
        yaxis=dict(
            title='seconds',
            zeroline=False
        ),
        title=dataset,
    )
    fig = go.Figure(data=traces, layout=layout)
    py.iplot(fig)
#     plotly.plotly.image.save_as(fig, filename='{}/{}-plot.png'.format(resultsDirectory, dataset))


# Average times per technique per dataset

In [None]:
for dataset in alldata.index.levels[0]:
    
    traces = []
    for timeCol in [ x for x in alldata.columns.values if x[:4]=='time']:
        times = []
        for technique in alldata.index.levels[1]:
            techniqueRows = alldata.loc[dataset].loc[technique]
            if techniqueRows.shape[0]==0:
                continue
            
            times.append( techniqueRows[timeCol].mean() )

        times = [0 if np.isnan(x) else x for x in times]
            
        traces.append(
            go.Bar(
                y=times,
                x=alldata.index.levels[1],
                name=timeCol
            )
        )
        
    layout = go.Layout(
        barmode='stack',
        yaxis=dict(
            title='seconds',
            zeroline=False
        ),
        title=dataset,
    )
    fig = go.Figure(data=traces, layout=layout)
    py.iplot(fig)
#     plotly.plotly.image.save_as(fig, filename='{}/{}-plot.png'.format(resultsDirectory, dataset))


# Looking at k param (kNN)

# Success rates for technique per dataset per k param in kNN (a lot of graphs!)

In [None]:
showBestParams = False
for dataset in alldataknn.index.levels[0]:
    for techniqueIndex, technique in sortedTechniques: 
        traces = []
        means = []
        techniques = []
        boxdata = []
        for k in alldataknn.index.levels[2]:
            techniqueRows = alldataknn.loc[dataset].loc[techniqueIndex].loc[k]            
            
            if techniqueRows.shape[0]==0:
                continue
            bestRowNum = techniqueRows['mean'].argmax()
            bestRow = techniqueRows.loc[bestRowNum,:]

            scores = [ x for x in alldataknn.columns.values if x[:5]=='score']
    #         scores = [ x for x in alldata.columns.values if x[:11]=='train_score']
#             if sum(bestRow[scores].values)==0: continue

            means.append( bestRow['mean'] ) # if bestRow['mean']>0 else np.median(bestRow[scores].values)
    #         means.append( bestRow['train_mean'] )
#             techniques.append( "k={}".format(k) )
            techniques.append(k)
            boxdata.append([x for x in bestRow[scores].values if x > 0])
#             traces.append(
#                 go.Box(
#                     y=[x for x in bestRow[scores].values if x > 0],
#                     name="k={}".format(k),
#                 )
#             )

            if showBestParams:
                print("======",dataset,"======",technique,"======")
                print(bestRow)
                print()

        fig, [ax] = startGraphing()
        title = "{}: {}".format(technique, dataset)
        plotBox(ax, boxdata, techniques, means, title=title, xlabel='"k" in kNN', doubleColors=False, rotateLabels=0)
    
        break
    break

# Successrates for each dataset depending on k param (kNN)

In [None]:
showBestParams = False
for dataset in alldataknn.index.levels[0]:
    traces = []
    filteredTechniques = []
    data = []
    for techniqueIndex, technique in sortedTechniques: 
        if techniqueIndex[:5]=="stand": continue
        
        means = []
        for k in alldataknn.index.levels[2]:
            techniqueRows = alldataknn.loc[dataset].loc[techniqueIndex].loc[k]            
            if techniqueRows.shape[0]==0: continue
            bestRowNum = techniqueRows['mean'].argmax()
            bestRow = techniqueRows.loc[bestRowNum,:]
    
            means.append( bestRow['mean'] )

        
        if sum(means)==0: continue
#         k_label = ['k='+str(x) for x in alldataknn.index.levels[2]]
        k_label = alldataknn.index.levels[2]
        data.append(means)
        filteredTechniques.append(technique)
    
    fig, [ax] = startGraphing('`{}` dataset'.format(dataset), size=(8,5))
    plotLines(ax, data, x_ticks=k_label, labels=filteredTechniques, doubleColors=False, xlabel='"k" in kNN')
    endGraphing(fig)
#     break

In [None]:
fig, axes = startGraphing(None, size=(8,10), cols=3, N=9) # '`k` in kNN classifier'
i=0

showBestParams = False

filteredTechniques = []
for dataset in alldataknn.index.levels[0]:
    traces = []
    data = []
    
    if dataset in ['digits6']: continue
    
    for techniqueIndex, technique in sortedTechniques: 
        if techniqueIndex[:5]!="stand": continue
        
        means = []
        for k in alldataknn.index.levels[2]:
            techniqueRows = alldataknn.loc[dataset].loc[techniqueIndex].loc[k]            
            if techniqueRows.shape[0]==0:
#                 means.append( np.nan )
                continue
            bestRowNum = techniqueRows['mean'].argmax()
            bestRow = techniqueRows.loc[bestRowNum,:]    
            means.append( bestRow['mean'] )
        
        if sum(means)==0: continue
#         k_label = ['k='+str(x) for x in alldataknn.index.levels[2]]
        k_label = alldataknn.index.levels[2]
        data.append(means)
        if i == 0:
            filteredTechniques.append(technique)
    
    plotLines(axes[i], data, x_ticks=k_label, doubleColors=False, xlabel='`k` in kNN classifier', title='`{}` dataset'.format(dataset))
    i += 1

endGraphing(fig, legend=filteredTechniques, move_title=.965, legend_ncol=4, adjust_legend=.125, filename=gfn('sr_knn'))

# Successrate depending on parameter

In [None]:
interesting_params = [
    ('lmnn', 'k'),
    ('lmnn', 'max_iter'),
    ('lmnn', 'regularization'),
#     ('itml', 'num_constraints'),
#     ('itml', 'gamma'),
#     ('itml', 'max_iters'),
#     ('sdml', 'num_constraints'),
#     ('sdml', 'use_cov'),
#     ('sdml', 'balance_param'),
#     ('sdml', 'sparsity_param'),
#     ('lsml', 'num_constraints'),
#     ('lsml', 'max_iter'),
#     ('nca', 'max_iter'),
    ('nca', 'learn_rate'),
    ('lfda', 'metric'),
#     ('rca', 'num_chunks'),
#     ('rca', 'chunk_size'),
    ('cmaes', ['transformer', 'metric']),
#     ('cmaes', 'n_gen'),
    ('cmaes', ['n_neighbors', 'c__n_neighbors']),
    ('cmaes', ['knn_weights', 'c__weights']),
    ('jde', ['transformer']),
#     ('jde', ['s__n_gen']),
#     ('jde', ['knn_weights', 'c__weights']),
]

def findName(te):
    for x,name in sortedTechniques:
        if x==te:
            return name
    return None

(fig, ax), ii = startGraphing(cols=3, N=9, size=(8, 3*3)), 0
for technique,paramNames in interesting_params:
    if not isinstance(paramNames, (list, tuple)):
        paramNames = [paramNames]
    
    traces = []
    data = []
    for dataset in alldata.index.levels[0]:
        techniqueFullName = 'stand+{}+kNN'.format(technique.upper())

        means = []
        for paramName in paramNames:
            possibleParamValues = alldata['{}__{}'.format(technique, paramName)].dropna().unique()
            possibleParamValues.sort()
            for param in possibleParamValues:
                techniqueRows = alldataknn.loc[dataset].loc[techniqueFullName]
                techniqueRows = techniqueRows[techniqueRows['{}__{}'.format(technique, paramName)] == param]
                if techniqueRows.shape[0]==0: continue
                bestRowNum = techniqueRows['mean'].argmax()
                bestRow = techniqueRows.loc[bestRowNum,:]

                means.append( bestRow['mean'] )
#                 print(dataset, param, means[-1])

        param_labels = ['{}={}'.format(paramNames[0], x) for x in possibleParamValues]
        param_labels = possibleParamValues
        data.append(means)

    datasetL = alldata.index.levels[0]
    title = '{0}'.format(findName(techniqueFullName), paramNames[0])

    plotLines(ax[ii], data, param_labels, labels=None, title=title, doubleColors=False, xlabel='`{}` parameter'.format(paramNames[0]))
    ii +=1

endGraphing(fig, filename=gfn('sr_hyp'), legend=datasetL, adjust_legend=.13, legend_ncol=4, legend_position='bottom')

# Learning times for all techniques per dataset

In [None]:
showBestParams = False
traces = []
for technique in alldata.index.levels[1]: 
    times = []
    datasetNames = []
    for dataset in alldata.index.levels[0]:
    
        if technique in ['kNN', 'stand+kNN']: continue
        if dataset in ['mice-protein']: continue
#         if technique[:5]!="stand": continue
        
        techniqueRows = alldataknn.loc[dataset].loc[technique]
        if techniqueRows.shape[0]==0: continue

        bestRowNum = techniqueRows['mean'].argmax()
        bestRow = techniqueRows.loc[bestRowNum,:]

        times.append( bestRow['time2']/3600 )
        datasetNames.append(dataset)
    
    traces.append( go.Scatter( x=datasetNames, y=times, mode='lines', name=technique ))

layout = go.Layout(
    yaxis=dict(title='hours', zeroline=False ),
    title='Learning times',
)
fig = go.Figure(data=traces, layout=layout)
py.iplot(fig)
    
# #     plotly.plotly.image.save_as(fig, filename='{}/{}-plot.png'.format(resultsDirectory, dataset))
#     break

# Training time depending on parameter

In [None]:
interesting_params = [
    ('lmnn', 'k'),
#     ('lmnn', 'max_iter'),
#     ('lmnn', 'regularization'),
#     ('lmnn', 'max_iter'),
# #     ('lmnn', 'learn_rate'),
#     ('itml', 'num_constraints'),
#     ('itml', 'gamma'),
#     ('itml', 'max_iters'),
#     ('sdml', 'num_constraints'),
#     ('sdml', 'use_cov'),
#     ('sdml', 'balance_param'),
#     ('sdml', 'sparsity_param'),
#     ('lsml', 'num_constraints'),
#     ('lsml', 'max_iter'),
# #     ('nca', 'max_iter'),
# #     ('nca', 'learn_rate'),
#     ('lfda', 'metric'),
# #     ('rca', 'num_chunks'),
# #     ('rca', 'chunk_size'),
#     ('cmaes', 'metric'),
#     ('cmaes', 'n_gen'),
#     ('cmaes', 'n_neighbors'),
#     ('cmaes', 'knn_weights'),
]

for technique,paramName in interesting_params:
    traces = []
    for dataset in alldata.index.levels[0]:
        techniqueFullName = '{}+kNN'.format(technique.upper())

        means = []
        possibleParamValues = alldata['{}__{}'.format(technique, paramName)].dropna().unique()
        for param in possibleParamValues:
            techniqueRows = alldataknn.loc[dataset].loc[techniqueFullName]
            techniqueRows = techniqueRows[techniqueRows['{}__{}'.format(technique, paramName)]==param]
            if techniqueRows.shape[0]==0: continue

            means.append( techniqueRows['time2'].mean() /3600 )

        param_labels = ['{}={}'.format(paramName, x) for x in possibleParamValues]
        traces.append( go.Scatter( x=param_labels, y=means, mode='lines', name=dataset ))

    layout = go.Layout(
        yaxis=dict(title='hours', zeroline=False ),
        title='{} by {}'.format(technique.upper(), paramName),
    )
    fig = go.Figure(data=traces, layout=layout)
    py.iplot(fig)
#     plotly.plotly.image.save_as(fig, filename='{}/{}-plot.png'.format(resultsDirectory, dataset))