In [None]:
import os

import pandas as pd
import numpy as np

import pareto

from matplotlib.ticker import StrMethodFormatter

import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
plt.style.use('seaborn-bright')
plt.rcParams['figure.figsize'] = [15, 9]
plt.rcParams['font.size'] = 12

pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None)

In [None]:
cwd = os.getcwd()
join = os.path.join
norm = os.path.normpath

In [None]:
dynamic_path = norm(join(cwd, '../dynamic/runs/dynamic_stats.csv'))
static_path = norm(join(cwd, '../static/runs/static_stats.csv'))
mlaa_path = norm(join(cwd, '../MLAA-Bernier/runs/MLAA_stats.csv'))
liu_path = norm(join(cwd, '../Hierarchical-Liu/runs/Hierarchical_stats.csv'))
cenk_path = norm(join(cwd, '../Yavuzturk/runs/Yavuzturk_stats.csv'))

In [None]:
df_d = pd.read_csv(dynamic_path, index_col=[0])
df_s = pd.read_csv(static_path, index_col=[0])
df_m = pd.read_csv(mlaa_path, index_col=[0])
df_l = pd.read_csv(liu_path, index_col=[0])
df_c = pd.read_csv(cenk_path, index_col=[0])

In [None]:
df_d.head(2)

In [None]:
def find_claesson(load, year):
    _df = df_d.loc[(df_d['load'] == load) & (df_d['sim time'] == year) & (df_d['start width'] == 5) & (df_d['end width'] == 5) & (df_d['exp_rate'] == 2)]
    x = _df['rmse'].values
    y = _df['run time fraction'].values
    return float(x), float(y)

In [None]:
def plot_all_methods_runtimefrac_vs_rmse(dfs, names, load, year):
    fig = plt.figure(figsize=(7, 5), dpi=200)
    ax = fig.add_subplot(1, 1, 1)
    markers = ['X', 'D', 'v', 'h', '+', '*']
    
    for idx, df in enumerate(dfs):
        mask = (df['load'] == load) & (df['sim time'] == year)
        x = df.loc[mask]['rmse']
        y = df.loc[mask]['run time fraction']
        ax.scatter(x, y, label=names[idx], marker=markers[idx])
      
    x, y = find_claesson(load, year)
    ax.scatter(x, y, label='Claesson', marker=markers[-1])
    
    plt.xlabel('RMSE MFT [C]')
    plt.ylabel('Runtime Fraction')
    plt.title('{} {}'.format(load.title(), year))
    
    plt.legend()
    plt.grid(True)
#     plt.savefig('{}_{}.pdf'.format(load, year), bbox_inches='tight')
    plt.show()

In [None]:
plot_all_methods_runtimefrac_vs_rmse([df_d, df_s, df_m, df_l, df_c], 
                                     ['Dynamic', 'Static', 'Bernier', 'Liu', 'Yavuzturk'],
                                    'balanced',
                                    1)

In [None]:
plot_all_methods_runtimefrac_vs_rmse([df_d, df_s, df_m, df_l, df_c], 
                                     ['Dynamic', 'Static', 'Bernier', 'Liu', 'Yavuzturk'],
                                    'imbalanced',
                                    1)

In [None]:
plot_all_methods_runtimefrac_vs_rmse([df_d, df_s, df_m, df_l, df_c], 
                                     ['Dynamic', 'Static', 'Bernier', 'Liu', 'Yavuzturk'],
                                    'balanced',
                                    5)

In [None]:
plot_all_methods_runtimefrac_vs_rmse([df_d, df_s, df_m, df_l, df_c], 
                                     ['Dynamic', 'Static', 'Bernier', 'Liu', 'Yavuzturk'],
                                    'imbalanced',
                                    5)

In [None]:
plot_all_methods_runtimefrac_vs_rmse([df_d, df_s, df_m, df_l, df_c], 
                                     ['Dynamic', 'Static', 'Bernier', 'Liu', 'Yavuzturk'],
                                    'balanced',
                                    10)

In [None]:
plot_all_methods_runtimefrac_vs_rmse([df_d, df_s, df_m, df_l, df_c], 
                                     ['Dynamic', 'Static', 'Bernier', 'Liu', 'Yavuzturk'],
                                    'imbalanced',
                                    10)

In [None]:
def define_pareto(df_in):
    df = pd.DataFrame.from_records(pareto.eps_sort([list(df_in.itertuples(False))], [4, 5]), columns=list(df_in.columns.values))
    df.sort_values(by=['rmse'], inplace=True)
    return df

In [None]:
def plot_pareto_with_data(df, load, year, data_label, ymax=None, ymin=None):
    mask = (df['sim time'] == year) & (df['load'] == load)
    df_pareto = define_pareto(df.loc[mask])
    
    fig = plt.figure(figsize=(7, 5), dpi=200)
    ax = fig.add_subplot(1, 1, 1)
    
    x = df['rmse'].loc[mask].values
    y = df['run time fraction'].loc[mask].values
    
    plt.scatter(x, y, label=data_label)
    plt.plot(df_pareto['rmse'].values, df_pareto['run time fraction'].values, c='r', label='Pareto')
    
    plt.xlabel('RMSE MFT [C]')
    plt.ylabel('Runtime Fraction')
    plt.title('{} {}'.format(load.title(), year))
        
    if ymax:
        plt.gca().set_ylim(top=ymax)
        
    if ymin:
        plt.gca().set_ylim(bottom=ymin)
    
    plt.legend()
    plt.grid(True)
    plt.savefig('{}_{}_pareto.pdf'.format(load, year), bbox_inches='tight')
    plt.show()

In [None]:
plot_pareto_with_data(df_d, 'balanced', 1, 'Dynamic', ymax = 0.035, ymin=0.023)

In [None]:
plot_pareto_with_data(df_d, 'imbalanced', 1, 'Dynamic', ymax=0.035, ymin=0.023)

In [None]:
plot_pareto_with_data(df_d, 'balanced', 5, 'Dynamic', ymax=0.013, ymin=0.005)

In [None]:
plot_pareto_with_data(df_d, 'imbalanced', 5, 'Dynamic', ymax=0.013, ymin=0.005)

In [None]:
plot_pareto_with_data(df_d, 'balanced', 10, 'Dynamic', ymax=0.008, ymin=0.003)

In [None]:
plot_pareto_with_data(df_d, 'imbalanced', 10, 'Dynamic', ymax=0.008, ymin=0.003)

In [None]:
df_d.head(1)

In [None]:
def get_all_paretos(df, loads, years):
    
    df_ret = pd.DataFrame(columns=df.columns)
    
    for load in loads:
        for year in years:
            mask = (df['load'] == load) & (df['sim time'] == year)
            
            df_pareto = define_pareto(df.loc[mask])
            
            df_ret = pd.concat([df_ret, df_pareto])
        
    return df_ret

In [None]:
df_all_pareto = get_all_paretos(df_d, ['balanced', 'imbalanced'], [1, 5, 10])

In [None]:
df_all_pareto.head(1)

In [None]:
df_all_pareto[['depth', 
               'end width', 
               'exp_rate', 
               'rmse', 
               'run time', 
               'run time fraction', 
               'run time stdev', 
               'sample count', 
               'sim time', 
               'start width']] = df_all_pareto[['depth', 
                                                'end width', 
                                                'exp_rate', 
                                                'rmse', 
                                                'run time', 
                                                'run time fraction', 
                                                'run time stdev', 
                                                'sample count', 
                                                'sim time', 
                                                'start width']].apply(pd.to_numeric)

In [None]:
df_all_pareto.hist()

In [None]:
def make_hist(series, label, save_name):

    # https://community.modeanalytics.com/gallery/python_histogram/
    ax = df_all_pareto.hist(column=series, by='sim time', bins=10, sharex=True, layout=(3, 1), figsize=(12,8), zorder=2, rwidth=0.9)
    for i,x in enumerate(ax):

        # Despine
        x.spines['right'].set_visible(False)
        x.spines['top'].set_visible(False)
        x.spines['left'].set_visible(False)

        # Switch off ticks
        x.tick_params(axis="both", which="both", bottom=False, top=False, labelbottom=True, left=False, right=False, labelleft=True)

        # Draw horizontal axis lines
        vals = x.get_yticks()
        for tick in vals:
            x.axhline(y=tick, linestyle='dashed', alpha=0.4, color='#eeeeee', zorder=1)

        # Set x-axis label
        x.set_xlabel(label, labelpad=20, size=14)

        # Set y-axis label
        if i == 1:
            x.set_ylabel("Frequency", labelpad=50, size=14)

        # Format y-axis label
        x.yaxis.set_major_formatter(StrMethodFormatter('{x:,g}'))

        x.tick_params(axis='x', rotation=0)
        
    fig = ax[0].get_figure()
    fig.savefig(save_name, bbox_inches='tight')

In [None]:
make_hist('rmse', 'RMSE MFT [C]', 'hist_rmse.pdf')

In [None]:
def make_scatter_with_color_bar(df, color_name, color_label, title, ymax=None, ymin=None, save_name=None):

    fig = plt.figure(figsize=(7, 5), dpi=200)
    ax = fig.add_subplot(1, 1, 1)

    c = df[color_name].values
    sc = ax.scatter(df['rmse'].values, df['run time fraction'].values, c=c, cmap='jet', label='Dynamic')
    cb = plt.colorbar(sc)
    
    cb.set_label(color_label)
    
    plt.xlabel('RMSE MFT [C]')
    plt.ylabel('Runtime Fraction')
    plt.title(title)
        
    if ymax:
        plt.gca().set_ylim(top=ymax)
        
    if ymin:
        plt.gca().set_ylim(bottom=ymin)
    
    plt.legend()
    plt.grid(True)
    
    if save_name:
        plt.savefig('{}.pdf'.format(save_name), bbox_inches='tight')
        
    plt.show()

In [None]:
df_d['sw-ew'] = df_d['start width'] - df_d['end width']

In [None]:
load = 'balanced'
year = 1
mask = (df_d['load'] == load) & (df_d['sim time'] == year)
make_scatter_with_color_bar(df_d.loc[mask], 'exp_rate', 'Expansion Rate', '{} {}'.format(load.title(), year), save_name='{}_{}_exp_rate'.format(load, year))

In [None]:
load = 'imbalanced'
year = 1
mask = (df_d['load'] == load) & (df_d['sim time'] == year)
make_scatter_with_color_bar(df_d.loc[mask], 'exp_rate', 'Expansion Rate', '{} {}'.format(load.title(), year), save_name='{}_{}_exp_rate'.format(load, year))

In [None]:
load = 'balanced'
year = 5
mask = (df_d['load'] == load) & (df_d['sim time'] == year)
make_scatter_with_color_bar(df_d.loc[mask], 'exp_rate', 'Expansion Rate', '{} {}'.format(load.title(), year), save_name='{}_{}_exp_rate'.format(load, year))

In [None]:
load = 'imbalanced'
year = 5
mask = (df_d['load'] == load) & (df_d['sim time'] == year)
make_scatter_with_color_bar(df_d.loc[mask], 'exp_rate', 'Expansion Rate', '{} {}'.format(load.title(), year), save_name='{}_{}_exp_rate'.format(load, year))

In [None]:
load = 'balanced'
year = 10
mask = (df_d['load'] == load) & (df_d['sim time'] == year)
make_scatter_with_color_bar(df_d.loc[mask], 'exp_rate', 'Expansion Rate', '{} {}'.format(load.title(), year), save_name='{}_{}_exp_rate'.format(load, year))

In [None]:
load = 'imbalanced'
year = 10
mask = (df_d['load'] == load) & (df_d['sim time'] == year)
make_scatter_with_color_bar(df_d.loc[mask], 'exp_rate', 'Expansion Rate', '{} {}'.format(load.title(), year), save_name='{}_{}_exp_rate'.format(load, year))

In [None]:
df_d.head(2)

In [None]:
def make_scatter_with_color_bar_and_markers(df, color_name, color_label, marker_data, marker_names, title, ymax=None, ymin=None, save_name=None):

    fig = plt.figure(figsize=(7, 5), dpi=200)
    ax = fig.add_subplot(1, 1, 1)

    markers = ['X', 'D', 'v', 'h', '+', '*']
    
    for idx, m in enumerate(marker_names):
        mask = df[marker_data] == marker_names[idx]
        c = df[color_name].loc[mask].values
        sc = ax.scatter(df['rmse'].loc[mask].values, df['run time fraction'].loc[mask].values, c=c, cmap='jet', label=str(m), marker=markers[idx])
        
    cb = plt.colorbar(sc)
    cb.set_label(color_label)
    
    plt.xlabel('RMSE MFT [C]')
    plt.ylabel('Runtime Fraction')
    plt.title(title)
        
    if ymax:
        plt.gca().set_ylim(top=ymax)
        
    if ymin:
        plt.gca().set_ylim(bottom=ymin)
    
    plt.legend()
    plt.grid(True)
    
    if save_name:
        plt.savefig('{}.pdf'.format(save_name), bbox_inches='tight')
        
    plt.show()

In [None]:
load = 'balanced'
year = 1

mask_1 = (df_d['load'] == load) & (df_d['sim time'] == year)
mask_2 = (df_d['exp_rate'] == 1.25) | (df_d['exp_rate'] == 1.50) | (df_d['exp_rate'] == 1.62) | (df_d['exp_rate'] == 1.75)
mask = mask_1 & mask_2
make_scatter_with_color_bar(df_d.loc[mask], 'start width', 'Start Width', '{} {}'.format(load.title(), year), save_name='{}_{}_125-to-175_exp_rate_start_width'.format(load, year))

In [None]:
load = 'imbalanced'
year = 1

mask_1 = (df_d['load'] == load) & (df_d['sim time'] == year)
mask_2 = (df_d['exp_rate'] == 1.25) | (df_d['exp_rate'] == 1.50) | (df_d['exp_rate'] == 1.62) | (df_d['exp_rate'] == 1.75)
mask = mask_1 & mask_2
make_scatter_with_color_bar(df_d.loc[mask], 'start width', 'Start Width', '{} {}'.format(load.title(), year), save_name='{}_{}_125-to-175_exp_rate_start_width'.format(load, year))

In [None]:
load = 'balanced'
year = 5

mask_1 = (df_d['load'] == load) & (df_d['sim time'] == year)
mask_2 = (df_d['exp_rate'] == 1.25) | (df_d['exp_rate'] == 1.50) | (df_d['exp_rate'] == 1.62) | (df_d['exp_rate'] == 1.75)
mask = mask_1 & mask_2
make_scatter_with_color_bar(df_d.loc[mask], 'start width', 'Start Width', '{} {}'.format(load.title(), year), save_name='{}_{}_125-to-175_exp_rate_start_width'.format(load, year))

In [None]:
load = 'imbalanced'
year = 5

mask_1 = (df_d['load'] == load) & (df_d['sim time'] == year)
mask_2 = (df_d['exp_rate'] == 1.25) | (df_d['exp_rate'] == 1.50) | (df_d['exp_rate'] == 1.62) | (df_d['exp_rate'] == 1.75)
mask = mask_1 & mask_2
make_scatter_with_color_bar(df_d.loc[mask], 'start width', 'Start Width', '{} {}'.format(load.title(), year), save_name='{}_{}_125-to-175_exp_rate_start_width'.format(load, year))

In [None]:
load = 'imbalanced'
year = 5

mask_1 = (df_d['load'] == load) & (df_d['sim time'] == year)
mask_2 = (df_d['exp_rate'] == 1.25) | (df_d['exp_rate'] == 1.50) | (df_d['exp_rate'] == 1.62) | (df_d['exp_rate'] == 1.75)
mask = mask_1 & mask_2
make_scatter_with_color_bar(df_d.loc[mask], 'start width', 'Start Width', '{} {}'.format(load.title(), year), save_name='{}_{}_125-to-175_exp_rate_start_width'.format(load, year))

In [None]:
load = 'balanced'
year = 10

mask_1 = (df_d['load'] == load) & (df_d['sim time'] == year)
mask_2 = (df_d['exp_rate'] == 1.25) | (df_d['exp_rate'] == 1.50) | (df_d['exp_rate'] == 1.62) | (df_d['exp_rate'] == 1.75)
mask = mask_1 & mask_2
make_scatter_with_color_bar(df_d.loc[mask], 'start width', 'Start Width', '{} {}'.format(load.title(), year), save_name='{}_{}_125-to-175_exp_rate_start_width'.format(load, year))

In [None]:
load = 'imbalanced'
year = 10

mask_1 = (df_d['load'] == load) & (df_d['sim time'] == year)
mask_2 = (df_d['exp_rate'] == 1.25) | (df_d['exp_rate'] == 1.50) | (df_d['exp_rate'] == 1.62) | (df_d['exp_rate'] == 1.75)
mask = mask_1 & mask_2
make_scatter_with_color_bar(df_d.loc[mask], 'start width', 'Start Width', '{} {}'.format(load.title(), year), save_name='{}_{}_125-to-175_exp_rate_start_width'.format(load, year))

In [None]:
load = 'balanced'
year = 1

mask_1 = (df_d['load'] == load) & (df_d['sim time'] == year) & (df_d['start width'] == df_d['end width'])
mask_2 = (df_d['exp_rate'] == 1.25) | (df_d['exp_rate'] == 1.50) | (df_d['exp_rate'] == 1.62) | (df_d['exp_rate'] == 1.75)
mask = mask_1 & mask_2
make_scatter_with_color_bar(df_d.loc[mask], 'start width', 'Start Width', '{} {}'.format(load.title(), year), save_name='{}_{}_125-to-175_exp_rate_uniform_start_width_end_width'.format(load, year))

In [None]:
load = 'imbalanced'
year = 1

mask_1 = (df_d['load'] == load) & (df_d['sim time'] == year) & (df_d['start width'] == df_d['end width'])
mask_2 = (df_d['exp_rate'] == 1.25) | (df_d['exp_rate'] == 1.50) | (df_d['exp_rate'] == 1.62) | (df_d['exp_rate'] == 1.75)
mask = mask_1 & mask_2
make_scatter_with_color_bar(df_d.loc[mask], 'start width', 'Start Width', '{} {}'.format(load.title(), year), save_name='{}_{}_125-to-175_exp_rate_uniform_start_width_end_width'.format(load, year))

In [None]:
load = 'balanced'
year = 5

mask_1 = (df_d['load'] == load) & (df_d['sim time'] == year) & (df_d['start width'] == df_d['end width'])
mask_2 = (df_d['exp_rate'] == 1.25) | (df_d['exp_rate'] == 1.50) | (df_d['exp_rate'] == 1.62) | (df_d['exp_rate'] == 1.75)
mask = mask_1 & mask_2
make_scatter_with_color_bar(df_d.loc[mask], 'start width', 'Start Width', '{} {}'.format(load.title(), year), save_name='{}_{}_125-to-175_exp_rate_uniform_start_width_end_width'.format(load, year))

In [None]:
load = 'imbalanced'
year = 5

mask_1 = (df_d['load'] == load) & (df_d['sim time'] == year) & (df_d['start width'] == df_d['end width'])
mask_2 = (df_d['exp_rate'] == 1.25) | (df_d['exp_rate'] == 1.50) | (df_d['exp_rate'] == 1.62) | (df_d['exp_rate'] == 1.75)
mask = mask_1 & mask_2
make_scatter_with_color_bar(df_d.loc[mask], 'start width', 'Start Width', '{} {}'.format(load.title(), year), save_name='{}_{}_125-to-175_exp_rate_uniform_start_width_end_width'.format(load, year))

In [None]:
load = 'balanced'
year = 10

mask_1 = (df_d['load'] == load) & (df_d['sim time'] == year) & (df_d['start width'] == df_d['end width'])
mask_2 = (df_d['exp_rate'] == 1.25) | (df_d['exp_rate'] == 1.50) | (df_d['exp_rate'] == 1.62) | (df_d['exp_rate'] == 1.75)
mask = mask_1 & mask_2
make_scatter_with_color_bar(df_d.loc[mask], 'start width', 'Start Width', '{} {}'.format(load.title(), year), save_name='{}_{}_125-to-175_exp_rate_uniform_start_width_end_width'.format(load, year))

In [None]:
load = 'imbalanced'
year = 10

mask_1 = (df_d['load'] == load) & (df_d['sim time'] == year) & (df_d['start width'] == df_d['end width'])
mask_2 = (df_d['exp_rate'] == 1.25) | (df_d['exp_rate'] == 1.50) | (df_d['exp_rate'] == 1.62) | (df_d['exp_rate'] == 1.75)
mask = mask_1 & mask_2
make_scatter_with_color_bar(df_d.loc[mask], 'start width', 'Start Width', '{} {}'.format(load.title(), year), save_name='{}_{}_125-to-175_exp_rate_uniform_start_width_end_width'.format(load, year))

In [None]:
make_fig(pareto_i1, exp_rates, 'exp_rate', '1-year Imbalanced')
make_fig(pareto_i2, exp_rates, 'exp_rate', '2-year Imbalanced')
make_fig(pareto_i3, exp_rates, 'exp_rate', '3-year Imbalanced')
make_fig(pareto_i4, exp_rates, 'exp_rate', '4-year Imbalanced')
make_fig(pareto_i5, exp_rates, 'exp_rate', '5-year Imbalanced')
make_fig(pareto_i6, exp_rates, 'exp_rate', '6-year Imbalanced')

In [None]:
def make_fig_with_annotation(df_in, mask_series, mask_col_name, annotate_col_name, title=None):
    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)

    for idx, mask in enumerate(reversed(mask_series)):
        s = df_in[mask_col_name] == float(mask)

        x = df_in.loc[s]['rmse']
        y = df_in.loc[s]['run time']

        m = markers[idx]
        ax.scatter(x, y, marker=m, label=mask, s=60)
              
        for i, txt in enumerate(df_in.loc[s][annotate_col_name].values):
            ax.annotate(txt, (x.values[i], y.values[i]))

    if title:
        plt.title(title)
        
    plt.legend()
    plt.show()

In [None]:
exp_rate_mask = df['exp_rate'] == 1.75

In [None]:
start_widths = range(1, 6)

In [None]:
make_fig_with_annotation(df.loc[m_b1 & exp_rate_mask], start_widths, 'start width', 'end width', '1-year Balanced')
make_fig_with_annotation(df.loc[m_b2 & exp_rate_mask], start_widths, 'start width', 'end width', '2-year Balanced')
make_fig_with_annotation(df.loc[m_b3 & exp_rate_mask], start_widths, 'start width', 'end width', '3-year Balanced')
make_fig_with_annotation(df.loc[m_b4 & exp_rate_mask], start_widths, 'start width', 'end width', '4-year Balanced')
make_fig_with_annotation(df.loc[m_b5 & exp_rate_mask], start_widths, 'start width', 'end width', '5-year Balanced')
make_fig_with_annotation(df.loc[m_b6 & exp_rate_mask], start_widths, 'start width', 'end width', '6-year Balanced')

In [None]:
make_fig_with_annotation(df.loc[m_i1 & exp_rate_mask], start_widths, 'start width', 'end width', '1-year Imbalanced')
make_fig_with_annotation(df.loc[m_i2 & exp_rate_mask], start_widths, 'start width', 'end width', '2-year Imbalanced')
make_fig_with_annotation(df.loc[m_i3 & exp_rate_mask], start_widths, 'start width', 'end width', '3-year Imbalanced')
make_fig_with_annotation(df.loc[m_i4 & exp_rate_mask], start_widths, 'start width', 'end width', '4-year Imbalanced')
make_fig_with_annotation(df.loc[m_i5 & exp_rate_mask], start_widths, 'start width', 'end width', '5-year Imbalanced')
make_fig_with_annotation(df.loc[m_i6 & exp_rate_mask], start_widths, 'start width', 'end width', '6-year Imbalanced')

In [None]:
def make_some_plot(*args):
    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)
    
    args = args[0]
    
    for s in args:
        print(args)
        try:
            ax.plot(s['x'], s['y'], label=s['label'])
        except KeyError:
                ax.plot(s['x'], s['y'])
        
    plt.grid()
    plt.legend()
    plt.show()

In [None]:
make_some_plot([a, b, c])