In [None]:
import os

import pandas as pd

import pareto

import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = [15, 9]
plt.rcParams['font.size'] = 14

pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None)

In [None]:
join = os.path.join
norm = os.path.normpath

In [None]:
cwd = os.getcwd()

In [None]:
dynamic_path = norm(join(cwd, '../dynamic/runs/dynamic_stats.csv'))

In [None]:
dynamic_path

In [None]:
df = pd.read_csv(dynamic_path, index_col=[0])
df.head()

In [None]:
m_b1 = (df['load'] == 'balanced') & (df['sim time'] == 1)
m_b5 = (df['load'] == 'balanced') & (df['sim time'] == 5)
m_i1 = (df['load'] == 'imbalanced') & (df['sim time'] == 1)
m_i5 = (df['load'] == 'imbalanced') & (df['sim time'] == 5)

In [None]:
exp_rates = ['1.25', '1.50', '1.62', '1.75', '2.00', '2.25', '2.50', '2.75', '3.00']

markers = ['o', 'v', '^', '<', '>', 'p', 'P', 's', 'h', '+', 'X', 'x', 'h', 'H', 'D', 'd']

In [None]:
def make_fig(df_in, mask_series, mask_col_name, title=None):
    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)

    for idx, mask in enumerate(reversed(mask_series)):
        s = df_in[mask_col_name] == float(mask)

        x = df_in.loc[s]['rmse']
        y = df_in.loc[s]['run time']

        m = markers[idx]
        ax.scatter(x, y, marker=m, label=mask, s=60)

    if title:
        plt.title(title)
        
    plt.legend()
    plt.show()

In [None]:
make_fig(df.loc[m_b1], exp_rates, 'exp_rate', '1-year Balanced')
make_fig(df.loc[m_b5], exp_rates, 'exp_rate', '5-year Balanced')
make_fig(df.loc[m_i1], exp_rates, 'exp_rate', '1-year Imbalanced')
make_fig(df.loc[m_i5], exp_rates, 'exp_rate', '5-year Imbalanced')

In [None]:
def define_pareto(df_in):
    df = pd.DataFrame.from_records(pareto.eps_sort([list(df_in.itertuples(False))], [4, 5]), columns=list(df_in.columns.values))
    df.sort_values(by=['rmse'], inplace=True)
    return df

In [None]:
pareto_b1 = define_pareto(df.loc[m_b1])
pareto_b1.head(2)

In [None]:
pareto_b5 = define_pareto(df.loc[m_b5])
pareto_b5.head(2)

In [None]:
pareto_i1 = define_pareto(df.loc[m_i1])
pareto_i1.head(2)

In [None]:
pareto_i5 = define_pareto(df.loc[m_i5])
pareto_i5.head(2)

In [None]:
make_fig(pareto_b1, exp_rates, 'exp_rate', '1-year Balanced')
make_fig(pareto_b5, exp_rates, 'exp_rate', '5-year Balanced')
make_fig(pareto_i1, exp_rates, 'exp_rate', '1-year Imbalanced')
make_fig(pareto_i5, exp_rates, 'exp_rate', '5-year Imbalanced')

In [None]:
def make_fig_with_annotation(df_in, mask_series, mask_col_name, annotate_col_name, title=None):
    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)

    for idx, mask in enumerate(reversed(mask_series)):
        s = df_in[mask_col_name] == float(mask)

        x = df_in.loc[s]['rmse']
        y = df_in.loc[s]['run time']

        m = markers[idx]
        ax.scatter(x, y, marker=m, label=mask, s=60)
              
        for i, txt in enumerate(df_in.loc[s][annotate_col_name].values):
            ax.annotate(txt, (x.values[i], y.values[i]))

    if title:
        plt.title(title)
        
    plt.legend()
    plt.show()

In [None]:
exp_rate_mask = df['exp_rate'] == 1.75

In [None]:
start_widths = range(1, 6)

In [None]:
make_fig_with_annotation(df.loc[m_b1 & exp_rate_mask], start_widths, 'start width', 'end width', '1-year Balanced')
make_fig_with_annotation(df.loc[m_b5 & exp_rate_mask], start_widths, 'start width', 'end width', '5-year Balanced')
make_fig_with_annotation(df.loc[m_i1 & exp_rate_mask], start_widths, 'start width', 'end width', '1-year Imbalanced')
make_fig_with_annotation(df.loc[m_i5 & exp_rate_mask], start_widths, 'start width', 'end width', '5-year Imbalanced')