In [1]:
import dill
import numpy as np
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode()




In [2]:
def plot_times_by_class(models_times_by_class, title):
    models_times = {name: get_total_times(times) for name, times in models_times_by_class.items()}
    data = []
    for model_name, result in models_times.items():
        x = []
        y = []
        for class_name, scores in sorted(result.items()):
            x += len(scores)*[class_name]
            y += scores
        trace = go.Box(
            y=y,
            x=x,
            name=model_name,
            boxmean=True
        )
        data.append(trace)
    layout = go.Layout(
        yaxis=dict(
            title=title,
            zeroline=True
        ),
        boxmode='group'
    )
    fig = go.Figure(data=data, layout=layout)
    iplot(fig)
    
def plot_aggregated_times(aggregated_times, title):
    data = []
    for model_name, result in aggregated_times.items():
        x = []
        y = []
        for time_name, scores in sorted(result.items()):
            x += len(scores)*[time_name]
            y += scores
        trace = go.Box(
            y=y,
            x=x,
            name=model_name,
            boxmean=True
        )
        data.append(trace)
    layout = go.Layout(
        yaxis=dict(
            title=title,
            zeroline=True
        ),
        boxmode='group'
    )
    fig = go.Figure(data=data, layout=layout)
    iplot(fig)

def aggregate_times(models_times_by_class):
    result = {}
    for model_name, model_results in models_times_by_class.items():
        aggregated_class_results = []
        for class_name, class_results in model_results.items():
            aggregated_class_results.append(np.array(class_results))
        model_mat = np.vstack(aggregated_class_results)
        model_dict = {}
        for col in range(model_mat.shape[1]):
            model_dict['t{0}'.format(col)] = model_mat[:, col].tolist()
        result[model_name] = model_dict
    return result

def get_total_times(times_by_class):
    result = {}
    for class_, times in times_by_class.items():
        result[class_] = np.array(times).sum(axis=1).tolist()
    return result

In [3]:
def plot_scores(names, results, title):
    data = []
    for name, result in zip(names, results):
        x = []
        y = []
        for query in result:
            x += range(1, len(query) + 1)
            y += query.tolist()
        x = ['.{0}.'.format(i) for i in x]
        trace = go.Box(
            y=y,
            x=x,
            name=name,
            boxmean='sd'
        )
        data.append(trace)
        
    layout = go.Layout(
        yaxis=dict(
            title=title,
            zeroline=True
        ),
        boxmode='group'
    )
    fig = go.Figure(data=data, layout=layout)
    iplot(fig)
    
    
def plot_classes(results_dict, title):
    data = []
    for model_name, result in results_dict.items():
        x = []
        y = []
        for class_name, scores in sorted(result.items()):
            x += len(scores)*[class_name]
            y += scores
        trace = go.Box(
            y=y,
            x=x,
            name=model_name,
            boxmean=True
        )
        data.append(trace)
    layout = go.Layout(
        yaxis=dict(
            title=title,
            zeroline=True
        ),
        boxmode='group'
    )
    fig = go.Figure(data=data, layout=layout)
    iplot(fig)
    
def aggregate_results(results):
    aggregated_results = []
    for  class_, class_results in results.items():
        aggregated_results += list(class_results.values())
    return aggregated_results

def aggregate_results_fixed_n(model_results, n):
    aggregated_results = {}
    for class_name, results in model_results.items():
        nscores = [scores[n] for scores in results.values()]
        aggregated_results[class_name] = nscores
    return aggregated_results

In [4]:
with open('/home/lucas/tesis2/test_outputs/times_sequence_tree_lightcurves.R.txt_100samples_semistdFalse_stdTrue_20levels_250_10_100samples_per_class_20results_per_query.dill', 'rb') as f:
    test_results_100_R_std = dill.load(f)

with open('/home/lucas/tesis2/test_outputs/times_sequence_tree_lightcurves.R.txt_500samples_semistdFalse_stdTrue_20levels_250_10_100samples_per_class_20results_per_query.dill', 'rb') as f:
    test_results_500_R_std = dill.load(f)

with open('/home/lucas/tesis2/test_outputs/times_sequence_tree_lightcurves.R.txt_1000samples_semistdFalse_stdTrue_20levels_250_10_100samples_per_class_20results_per_query.dill', 'rb') as f:
    test_results_1000_R_std = dill.load(f)
    
with open('/home/lucas/tesis2/test_outputs/times_sequence_tree_lightcurves.R.txt_5000samples_semistdFalse_stdTrue_20levels_250_10_100samples_per_class_20results_per_query.dill', 'rb') as f:
    test_results_5000_R_std = dill.load(f)
    
    
with open('/home/lucas/tesis2/test_outputs/times_sequence_tree_lightcurves.R.txt_100samples_semistdTrue_stdFalse_20levels_250_10_100samples_per_class_20results_per_query.dill', 'rb') as f:
    test_results_100_R_semi = dill.load(f)

with open('/home/lucas/tesis2/test_outputs/times_sequence_tree_lightcurves.R.txt_500samples_semistdTrue_stdFalse_20levels_250_10_100samples_per_class_20results_per_query.dill', 'rb') as f:
    test_results_500_R_semi = dill.load(f)

with open('/home/lucas/tesis2/test_outputs/times_sequence_tree_lightcurves.R.txt_1000samples_semistdTrue_stdFalse_20levels_250_10_100samples_per_class_20results_per_query.dill', 'rb') as f:
    test_results_1000_R_semi = dill.load(f)
    
#with open('/home/lucas/tesis2/test_outputs/times_sequence_tree_lightcurves.R.txt_5000samples_semistdTrue_stdFalse_20levels_250_10_100samples_per_class_20results_per_query.dill', 'rb') as f:
#    test_results_5000_R_semi = dill.load(f)


with open('/home/lucas/tesis2/test_outputs/times_sequence_tree_lightcurves.R.txt_100samples_semistdFalse_stdFalse_20levels_250_10_100samples_per_class_20results_per_query.dill', 'rb') as f:
    test_results_100_R_not = dill.load(f)

with open('/home/lucas/tesis2/test_outputs/times_sequence_tree_lightcurves.R.txt_500samples_semistdFalse_stdFalse_20levels_250_10_100samples_per_class_20results_per_query.dill', 'rb') as f:
    test_results_500_R_not = dill.load(f)

with open('/home/lucas/tesis2/test_outputs/times_sequence_tree_lightcurves.R.txt_1000samples_semistdFalse_stdFalse_20levels_250_10_100samples_per_class_20results_per_query.dill', 'rb') as f:
    test_results_1000_R_not = dill.load(f)

    
with open('/home/lucas/tesis2/old/test_outputs/fatsfeatures_pca2_50samples_per_class_20results_per_query.dill', 'rb') as f:
    fats_pca2 = dill.load(f)
    
with open('/home/lucas/tesis2/old/test_outputs/fatsfeatures_pca5_50samples_per_class_20results_per_query.dill', 'rb') as f:
    fats_pca5 = dill.load(f)
    
with open('/home/lucas/tesis2/old/test_outputs/fatsfeatures_pca10_50samples_per_class_20results_per_query.dill', 'rb') as f:
    fats_pca10 = dill.load(f)
    
with open('/home/lucas/tesis2/old/test_outputs/fatsfeatures_pca25_50samples_per_class_20results_per_query.dill', 'rb') as f:
    fats_pca25 = dill.load(f)
    
with open('/home/lucas/tesis2/old/test_outputs/fatsfeatures_pcaNone_50samples_per_class_20results_per_query.dill', 'rb') as f:
    fats = dill.load(f)

with open('/home/lucas/tesis2/old/test_outputs/macfeatures_50samples_per_class_20results_per_query.dill', 'rb') as f:
    mackenzie = dill.load(f)
    

# Resultados con curvas estandarizadas

In [5]:
models = [test_results_100_R_std, test_results_500_R_std, test_results_1000_R_std, test_results_5000_R_std]
models_results_by_class = [model['results'] for model in models]
models_names = ['100_R', '500_R', '1000_R', '5000_R']
aggregated_results = [aggregate_results(result) for result in models_results_by_class]
models_times_by_class = {name: model['times'] for name, model in zip(models_names, models)}
aggregated_times = aggregate_times(models_times_by_class)
n = 20
results_by_class = {name: aggregate_results_fixed_n(model, n - 1)
                    for model, name in zip(models_results_by_class, models_names)}

## nDCG

In [6]:
plot_scores(models_names, aggregated_results, 'nDCG')

## nDCG por clases

In [8]:
plot_classes(results_by_class, 'nDCG')

# tiempo

In [9]:
plot_aggregated_times(aggregated_times, 'time')

## tiempo por clases

In [10]:
plot_times_by_class(models_times_by_class, 'time [seconds]')

# Resultados con curvas estandarizadas vs fats y tsfl

In [11]:
models = [test_results_100_R_std, test_results_500_R_std, test_results_1000_R_std, test_results_5000_R_std,
         fats_pca2, fats_pca5, fats_pca10, fats_pca25, mackenzie]
models_results_by_class = [model['results'] for model in models]
models_names = ['100_R', '500_R', '1000_R', '5000_R', 
                'fats_pca2', 'fats_pca5', 'fats_pca10', 'fats_pca25', 'tsfl']
aggregated_results = [aggregate_results(result) for result in models_results_by_class]

In [13]:
plot_scores(models_names, aggregated_results, 'nDCG')

# Resultados con curvas estandarizadas vs no estandarizadas

In [14]:
models = [test_results_100_R_std, test_results_500_R_std, test_results_1000_R_std,
         test_results_100_R_semi, test_results_500_R_semi, test_results_1000_R_semi,
         test_results_100_R_not, test_results_500_R_not, test_results_1000_R_not,]
models_results_by_class = [model['results'] for model in models]
models_names = ['100_std', '500_std', '1000_std',
               '100_semi', '500_semi', '1000_semi',
               '100_not', '500_not', '1000_not']
aggregated_results = [aggregate_results(result) for result in models_results_by_class]
models_times_by_class = {name: model['times'] for name, model in zip(models_names, models)}
aggregated_times = aggregate_times(models_times_by_class)
n = 20
results_by_class = {name: aggregate_results_fixed_n(model, n - 1)
                    for model, name in zip(models_results_by_class, models_names)}

In [15]:
plot_scores(models_names, aggregated_results, 'nDCG')