# Setup

In [None]:
%matplotlib inline

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")

np.set_printoptions(linewidth=1000, edgeitems=30)
pd.set_option('display.max_columns', 60)
pd.set_option('display.width', 2000)

In [None]:
# Some StackOverflow magic to enable importing of local module
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [None]:
from multiLevelCoSurrogates.local import base_dir

# Defining Data

In [None]:
acqs = [
    ('ucb', 1.5),
    ('ucb', 2.5),
    ('ucb', 3.5),
    ('ei', 0.0),
    ('ei', 0.5),
    ('ei', 1.0),
    ('poi', ''),
]

normalize=False
record_format = '{}{}_records.csv'

print(os.listdir(base_dir))

df = pd.read_csv(base_dir+record_format.format(*acqs[0]), index_col='index')
df.head()

In [None]:
fig, axes = plt.subplots(4,2, figsize=(9,12))

grouped = df.groupby(by=['which_model', 'fidelity'])

for ax, group in zip(axes.flatten(), grouped):
    name, sub_df = group
    sub_df = sub_df.groupby(by='iteration').mean()
    sub_df.plot(y=['mse_high', 'mse_low', 'mse_diff'], ax=ax)
    ax.set_title('acq on {}, {} fidelity updated'.format(*name))

plt.tight_layout()
plt.show()

# Normalized plots of the average over 10 runs

In [None]:
def plot_normalized(df, normalize=True):

    fig, axes = plt.subplots(1,5, figsize=(16,3))

    grouped = df.groupby(by=['which_model', 'fidelity'])
    mse_names = ['mse_high', 'mse_low', 'mse_diff']

    for ax, group in zip(axes.flatten(), grouped):
        name, sub_df = group
        
        if normalize:
            # Separate out the MSE values after initial training to use as reference
            num_iters = len(sub_df['iteration'].unique())
            first_rows = sub_df.loc[0::num_iters,mse_names].values

            # Get all values and reshape without copying s.t. the iterations are a separate dimension
            values = sub_df.loc[:,mse_names].values
            old_shape = values.shape
            new_shape = old_shape[0]//num_iters, num_iters, len(mse_names)
            values.shape = new_shape

            # Now we can use numpy's internal broadcasting for efficient division, and return the old shape afterwards
            new_values = values / first_rows[:,None,:]
            new_values.shape = old_shape

            # Finally the values are returned to the dataframe and can be plotted
            sub_df.loc[:,mse_names] = new_values

        sub_df = sub_df.groupby(by='iteration').mean()
        sub_df.plot(y=mse_names, ax=ax)
#         ax.set_title('acq on {}, {} fidelity updated'.format(*name))
        ax.set_title('acq on {}, fid {}'.format(*name))

    plt.tight_layout()
    plt.show()

## Upper Confidence Bound (UCB)

In [None]:
print(acqs[0])
plot_normalized(pd.read_csv(base_dir+record_format.format(*acqs[0]), index_col='index'), normalize=normalize)

In [None]:
print(acqs[1])
plot_normalized(pd.read_csv(base_dir+record_format.format(*acqs[1]), index_col='index'), normalize=normalize)

In [None]:
print(acqs[2])
plot_normalized(pd.read_csv(base_dir+record_format.format(*acqs[2]), index_col='index'), normalize=normalize)

## Expected Improvement (EI)

In [None]:
print(acqs[3])
plot_normalized(pd.read_csv(base_dir+record_format.format(*acqs[3]), index_col='index'), normalize=normalize)

In [None]:
print(acqs[4])
plot_normalized(pd.read_csv(base_dir+record_format.format(*acqs[4]), index_col='index'), normalize=normalize)

In [None]:
print(acqs[5])
plot_normalized(pd.read_csv(base_dir+record_format.format(*acqs[5]), index_col='index'), normalize=normalize)

## Probability of Improvement (POI)

In [None]:
print(acqs[6])
plot_normalized(pd.read_csv(base_dir+record_format.format(*acqs[6]), index_col='index'))

# Single example runs
For these runs, surface plots of all intermediate steps are available

In [None]:
record_format = '{}{}_SINGLE_records.csv'

## Upper Confidence Bound (UCB)

In [None]:
print(acqs[0])
plot_normalized(pd.read_csv(base_dir+record_format.format(*acqs[0]), index_col='index'), normalize=normalize)

In [None]:
print(acqs[1])
plot_normalized(pd.read_csv(base_dir+record_format.format(*acqs[1]), index_col='index'), normalize=normalize)

In [None]:
print(acqs[2])
plot_normalized(pd.read_csv(base_dir+record_format.format(*acqs[2]), index_col='index'), normalize=normalize)

## Expected Improvement (EI)

In [None]:
print(acqs[3])
plot_normalized(pd.read_csv(base_dir+record_format.format(*acqs[3]), index_col='index'), normalize=normalize)

In [None]:
print(acqs[4])
plot_normalized(pd.read_csv(base_dir+record_format.format(*acqs[4]), index_col='index'), normalize=normalize)

In [None]:
print(acqs[5])
plot_normalized(pd.read_csv(base_dir+record_format.format(*acqs[5]), index_col='index'), normalize=normalize)

## Probability of Improvement (POI)

In [None]:
print(acqs[6])
plot_normalized(pd.read_csv(base_dir+record_format.format(*acqs[6]), index_col='index'), normalize=normalize)