In [None]:
#!/usr/bin/python3
# -*- coding: utf-8 -*-

"""
2021-09-09-mfbo-comparisons.py: script to plot comparisons between different
runs of (different) multi-fidelity bayesian optimization algorithms
"""

import argparse
from collections import defaultdict
from itertools import product
from operator import itemgetter

import matplotlib.pyplot as plt
import mf2
import numpy as np
import pandas as pd
from parse import compile
from pyprojroot import here

from multiLevelCoSurrogates import CandidateArchive

data_path = here('files/2020-11-05-simple-mfbo/')
plot_path = here('plots/2021-09-09-mfbo-comparisons/', warn=False)
plot_path.mkdir(exist_ok=True, parents=True)

subfolder_template = compile('{func_name}-{method}-b{init_budget:d}-i{idx:d}')
archive_template = compile('archive_{iteration:d}.npy')
errorgrid_template = compile('errorgrid_{iteration:d}.nc')

named_functions = {
    func.name.lower(): func
    for func in mf2.bi_fidelity_functions
}

for a, f in product(np.round(np.linspace(0, 1, 11),2),
                    mf2.adjustable.bi_fidelity_functions):
    if a == 0 and 'paciorek' in f.name.lower():
        continue
    func = f(a)
    named_functions[func.name.lower()] = func


def compare_different_runs(save_exts=('.png', '.pdf')):
    """Compare logged data from different runs for the same problem/strategy

    :param save_exts:  which extensions to use when saving plots
    """
    ...


def compare_different_strategies(save_exts=('.png', '.pdf')):
    """Compare logged data from different strategies for the same problem

    Assumes only a single run is available for now

    :param save_exts:  which extensions to use when saving plots
    """
    # read & group all subfolders that only differ by 'method'
    groups = defaultdict(list)
    for subfolder in data_path.iterdir():
        match = subfolder_template.parse(subfolder.name)
        if not match:
            continue
        group_id = (match['func_name'], match['init_budget'], match['idx'])
        groups[group_id].append((match['method'], subfolder))

    # for each group, create and plot a figure
    for (func_name, init_budget, idx), folders in groups.items():
        print(f'{func_name} with init_budget={init_budget} (idx {idx})')
        fig, axes = plt.subplots(ncols=2, nrows=3, figsize=(8, 9), constrained_layout=True)
        fig.suptitle(
            f"Method comparison for {func_name} with init_budget={init_budget} (idx {idx})"
        )
        # for each experiment, plot the data
        for method, folder in sorted(folders, key=itemgetter(0)):
            df = pd.read_csv(folder / 'log.csv', index_col=0, sep=';')
            archive = np.load(folder / 'archive_000.npy', allow_pickle=True).item()
            df = add_min_over_time_to_log(df, archive, func_name.lower())
            plot_on_axes(axes, init_budget, df, label=method)
        axes[0,0].legend(loc=0)

        for suffix in save_exts:
            fig.savefig(plot_path / f'comparison-{func_name}-b{init_budget}-i{idx}{suffix}')
        plt.close()


def plot_on_axes(axes, init_budget, df, label=''):
    budget_used = init_budget - df['budget'].values

    ax = axes[0,0]
    # EG size path
    ax.plot(df['nlow'].values, df['nhigh'].values, marker='o', label=label)
    ax.set_title('EG size \'path\'')
    ax.set_ylabel('high-fid samples')
    ax.set_xlabel('low-fid samples')

    ax = axes[0, 1]
    # tau / budget
    ax.plot(budget_used, df['tau'].values, label=label)
    ax.set_title('Tau')
    ax.set_ylim(bottom=0, top=max(df['tau'].values))
    ax.set_ylabel('$\\tau$')
    ax.set_xlabel('evaluation cost')
    ax.legend(loc='best')

    ax = axes[1, 0]
    # wall-time / budget
    ax.plot(budget_used, df['wall_time'].values, label=label)
    ax.set_title('wall-time')
    ax.set_yscale('log')
    ax.set_ylabel('time (s)')
    ax.set_xlabel('evaluation cost')
    ax.legend(loc='best')

    ax = axes[1, 1]
    # reuse_fraction / budget
    ax.plot(budget_used, df['reuse_fraction'].values, label=label)
    ax.set_title('reuse_fraction')
    ax.set_ylim(bottom=0, top=1)
    ax.set_ylabel('model reuse fraction')
    ax.set_xlabel('evaluation cost')
    ax.legend(loc='best')

    ax = axes[2,0]
    # minimum fitness over time per fidelity
    ax.plot(budget_used, df['opt_low'], label=f'{label} (low)')
    ax.plot(budget_used, df['opt_high'], label=f'{label} (high)')
    ax.set_title('best fitness')
    ax.set_ylim(bottom=0)
    ax.set_ylabel('fitness (high- and low-fidelity)')
    ax.set_xlabel('evaluation cost')
    ax.legend(loc='best')

    ax = axes[2,1]
    # error to high-fidelity optimum for high-fid evaluated values
    ax.plot(budget_used, df['err_to_opt'], label=label)
    ax.set_title('distance to optimum')
    ax.set_yscale('log')
    ax.set_ylabel('y-error')
    ax.set_xlabel('evaluation cost')
    ax.legend(loc='best')


def add_min_over_time_to_log(df: pd.DataFrame, init_archive: CandidateArchive, func_name: str):
    """Add the minimum fitness values over time for each fidelity to the dataframe"""

    # gather improvements per fidelity from the dataframe
    for fidelity in ['low', 'high']:
        fitnesses = np.array([np.inf] * len(df['fitness']))
        fitnesses[df['fidelity'] == fidelity] = df.loc[df['fidelity'] == fidelity]['fitness']
        fitnesses[0] = min(init_archive.min[fidelity], fitnesses[0])
        fitnesses = np.minimum.accumulate(fitnesses)
        df[f'opt_{fidelity}'] = fitnesses

    func = named_functions[func_name]
    df['err_to_opt'] = df['opt_high'] - func.high(func.x_opt)
    return df


In [None]:
from operator import itemgetter
from pprint import pprint

optima = {
    'Branin': mf2.adjustable.branin.high(mf2.adjustable.branin.x_opt),
    'Paciorek': mf2.adjustable.paciorek.high(mf2.adjustable.paciorek.x_opt),
    'Hartmann': mf2.adjustable.hartmann3.high(mf2.adjustable.hartmann3.x_opt),
    'Trid': mf2.adjustable.trid.high(mf2.adjustable.trid.x_opt),
}

folder_template = compile('Adjustable {name} {a:f}-{remainder}')


for func_name in optima.keys():
    folders = [folder for folder in data_path.iterdir() if func_name in folder.name]
    
    fixed_folders = [f for f in folders if 'fixed' in f.name]
    naive_folders = [f for f in folders if 'naive' in f.name]
    
    fixed = []
    for folder in fixed_folders:
        df = pd.read_csv(folder / 'log.csv', index_col=0, sep=';')
        y_best = df.loc[df['fidelity'] == 'high']['fitness'].min() - optima[func_name]
        param = folder_template.parse(folder.name)['a']
        fixed.append((param, float(y_best)))
    
    naive = []
    for folder in naive_folders:
        df = pd.read_csv(folder / 'log.csv', index_col=0, sep=';')
        y_best = df.loc[df['fidelity'] == 'high']['fitness'].min() - optima[func_name]
        param = folder_template.parse(folder.name)['a']
        naive.append((param, float(y_best)))

    fixed.sort(key=itemgetter(0))
    naive.sort(key=itemgetter(0))
    
    fig, axes = plt.subplots(nrows=1, ncols=1, constrained_layout=True)
    axes.plot(*list(zip(*fixed)), label='fixed')
    axes.plot(*list(zip(*naive)), label='naive')
    axes.set_ylabel('error')
    axes.set_xlabel('adjustment parameter a')
    axes.legend(loc=1)
    axes.set_yscale('log')
    axes.set_title(func_name)
    fig.savefig(plot_path / f'adjustable_{func_name}_fixed_naive_comparison.png')
    fig.show()