In [None]:
%matplotlib inline

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

np.set_printoptions(linewidth=1000, edgeitems=30)
pd.set_option('display.max_columns', 60)
pd.set_option('display.width', 2000)

In [None]:
# Some StackOverflow magic to enable importing of local module
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [None]:
from multiLevelCoSurrogates.local import base_dir

In [None]:
records = ['_ei_records.csv', '_ucb_records.csv',]

print(os.listdir(base_dir))

df = pd.read_csv(base_dir+records[0], index_col='index')
df.head()

In [None]:
n_cols = min(len(grouped), 3)
n_rows = int(np.ceil(len(grouped)/n_cols))
fig, axes = plt.subplots(n_rows, n_cols, figsize=(12,4*n_rows))

grouped = df.groupby(by=['which_model', 'fidelity'])

for ax, group in zip(axes.flatten(), grouped):
    name, sub_df = group
    sub_df = sub_df.groupby(by='iteration')[['mse_high', 'mse_low', 'mse_hier', 'mse_low_on_high']]
    means = sub_df.mean()
    stds = sub_df.std()
    means.plot(ax=ax)
    
    low_bounds, high_bounds = means.values.T - stds.values.T*1.96, means.values.T + stds.values.T*1.96
    for low, high, color in zip(low_bounds, high_bounds, ['C0', 'C1', 'C2', 'C3']):
        ax.fill_between(means.index, low, high, color=color, alpha=.2)

    ax.set_title('acq on {}, {} fidelity updated'.format(*name))

plt.tight_layout()
plt.show()

In [None]:
def plot_normalized(df, axes=None):

    grouped = df.groupby(by=['which_model', 'fidelity'])
    mse_names = ['mse_high', 'mse_low', 'mse_hier', 'mse_low_on_high']

    if axes is None:
        n_cols = min(len(grouped), 3)
        n_rows = int(np.ceil(len(grouped)/n_cols))
        fig, axes = plt.subplots(n_rows, n_cols, figsize=(12,4*n_rows))
    
    for ax, group in zip(axes.flatten(), grouped):
        name, sub_df = group

        # Separate out the MSE values after initial training to use as reference
        num_iters = len(sub_df['iteration'].unique())
        first_rows = sub_df.loc[0::num_iters,mse_names].values

        # Get all values and reshape without copying s.t. the iterations are a separate dimension
        values = sub_df.loc[:,mse_names].values
        old_shape = values.shape
        new_shape = old_shape[0]//num_iters, num_iters, len(mse_names)
        values.shape = new_shape

        # Now we can use numpy's internal broadcasting for efficient division, and return the old shape afterwards
        new_values = values / first_rows[:,None,:]
        new_values.shape = old_shape

        # Finally the values are returned to the dataframe and plotted
        sub_df.loc[:,mse_names] = new_values
        sub_df = sub_df.groupby(by='iteration')[mse_names]
        means = sub_df.mean()
        stds = sub_df.std()
        
        means.plot(ax=ax)
        
        low_bounds, high_bounds = means.values.T - stds.values.T*1.96, means.values.T + stds.values.T*1.96
        for low, high, color in zip(low_bounds, high_bounds, ['C0', 'C1', 'C2', 'C3']):
            ax.fill_between(means.index, low, high, color=color, alpha=.2)
        
        ax.axhline(y=1.0, color='black', alpha=0.5)
        ax.set_title('acq on {}, {} fidelity updated'.format(*name))
        ax.set_ylim([0,2])

    plt.tight_layout()
    plt.show()

In [None]:
print(records[0])
plot_normalized(pd.read_csv(base_dir+records[0], index_col='index'))

In [None]:
print(records[1])
plot_normalized(pd.read_csv(base_dir+records[1], index_col='index'))