# Imports

In [None]:
import os

import matplotlib.pyplot as plt
import matplotlib.ticker as mticker  
from matplotlib import gridspec
import pandas as pd
import seaborn as sns
import numpy as np
from matplotlib.lines import Line2D

In [None]:
path_to_run_results = "/home/knut/Documents/project/UnseededRun_results"
path_to_plots = "/home/knut/Documents/project/UnseededRun_results/plots"
runs = []

for i in range(1, 6):
    p1 = os.path.join(path_to_run_results, "run" + str(i))
    p2 = os.path.join(path_to_run_results, "rs" + str(i))
    runs.append((p1, p2))

In [None]:
def get_convergence_data(df):
    i = 0
    plot_data = []
    for index, row in df.iterrows():
        if i > 14:
            break
        i += 1
        data = {}
        for j in range(100):
            if j == 0:
                data[j] = row[j]
            else:
                if row[j] < data[j - 1]:
                    data[j] = row[j]
                else:
                    data[j] = data[j - 1]
        plot_data.append(data)
    return plot_data

In [None]:
def make_all_plot(bo_df, bo_mean, rs_df, rs_mean, name, h_str, start):
    fig, axis = plt.subplots(figsize=(4.2, 5.6))
    axis.set_ylabel('MSE', fontdict={'size': 16})
    axis.set_xlabel('Trials', fontdict={'size': 16})

    bo_head = bo_df.head(15).transpose()
    rs_head = rs_df.head(15).transpose()

    min_lim = min(bo_head.loc[99].min(), rs_head.loc[99].min()) - 0.5
    max_lim = max(bo_mean[start], rs_mean[start])
    axis.set_ylim(bottom=min_lim, top=max_lim)

    bo_head.plot(ax=axis, color='C0', alpha=0.2)
    rs_head.plot(ax=axis, color='C1', alpha=0.2)
    axis.plot(bo_mean, color='C0')
    axis.plot(rs_mean, color='C1')
    custom_lines = [Line2D([0], [0], color='C0', lw=2),
                    Line2D([0], [0], color='C1', lw=2)]

    axis.legend(custom_lines, ["BO", "RS"], loc="upper right")
    plt.grid()
    plt.tight_layout()
    plt.savefig(name)


In [None]:
def make_box_plot(df, name, outlier):
    fig, ax = plt.subplots(figsize=(4.2, 5.6))

    ax = sns.boxplot(
        ax=ax,
        x="search",
        y="mean",
        data=df,
        showmeans=True,
        meanprops={
            "markerfacecolor": "white",
            "markeredgecolor": "black",
            "markersize": "8"
        },
        fliersize=8,
        medianprops=dict(color="k", alpha=1)
    )

    plt.ylabel('MSE', fontsize=16)
    plt.xlabel('Optimization Algorithm', fontsize=16)
    
    if outlier:
        custom_lines = [
            Line2D([0], [0], color='k', lw=2),
            Line2D([0], [0], color='w', marker='^', markerfacecolor="white", markeredgecolor="black", markersize=8),
            Line2D([0], [0], color='w', marker='d', markerfacecolor="black", markeredgecolor="black", markersize=8)
        ]
        ax.legend(custom_lines, ["Median", "Mean", "Outliers"])
    else:
        custom_lines = [
            Line2D([0], [0], color='k', lw=2),
            Line2D([0], [0], color='w', marker='^', markerfacecolor="white", markeredgecolor="black", markersize=8)
        ]
        ax.legend(custom_lines, ["Median", "Mean"])
        
    plt.grid()
    plt.tight_layout()
    plt.savefig(name)

In [None]:
def make_all_box_plot(df, name):
    fig, ax = plt.subplots(figsize=(8.4, 5.6))

    ax = sns.boxplot(
        ax=ax,
        x="space",
        y="mean",
        hue="Algorithm",
        data=df,
        showmeans=True,
        meanprops={
            "markerfacecolor": "white",
            "markeredgecolor": "black",
            "markersize": "8"
        },
        fliersize=8
    )

    plt.ylabel('MSE', fontsize=16)
    plt.xlabel('CS', fontsize=16)
    plt.grid()
    plt.tight_layout()
    plt.savefig(name)
    

In [None]:
def make_improvement_plot(mean, std):
    fig, ax = plt.subplots(figsize=(5.90551, 2))
    cs = ['CS1', 'CS2', 'CS3', 'CS4', 'CS5']
    y_pos = np.arange(len(cs))   
    color=["#7BB660", "#AFC980", "#43A040", "#208826", "#006d2c"]
    ax.barh(cs, mean, align='center', color=color)
    plt.gca().xaxis.set_major_formatter(mticker.PercentFormatter())
    ax.tick_params(axis='y', which='major', labelsize=10)
    ax.tick_params(axis='x', which='major', labelsize=8)
    #ax.set_xlabel('Mean change', fontsize=10)
    ax.set_axisbelow(True)
    plt.grid()
    plt.tight_layout()
    plt.savefig("/home/knut/Documents/project/UnseededRun_results/plots/mean_change.pdf", bbox_inches="tight")
    
    
    fig, ax = plt.subplots(figsize=(5.90551, 2))
    cs = ['CS1', 'CS2', 'CS3', 'CS4', 'CS5']
    y_pos = np.arange(len(cs))   
    color=["#ef3b2c", "#fb6a4a", "#7BB660", "#43A040", "#208826"]
    ax.barh(cs, std, align='center', color=color)
    plt.gca().xaxis.set_major_formatter(mticker.PercentFormatter())
    ax.tick_params(axis='y', which='major', labelsize=10)
    ax.tick_params(axis='x', which='major', labelsize=8)
    #ax.set_xlabel('STD change', fontsize=10)
    ax.set_axisbelow(True)
    plt.grid()
    plt.tight_layout()
    plt.savefig("/home/knut/Documents/project/UnseededRun_results/plots/std_change.pdf", bbox_inches="tight")

In [None]:
results = ["train_val_results.csv"]
names = ["batch_"]

final_ = []
search_ = []
space_ = []
mean_diff = []
std_diff = []

for i, (bo_path, rs_path) in enumerate(runs):
    for j in range(len(results)):
        final = []
        search = []
        space = []

        bo_csv = os.path.join(bo_path, results[j])
        rs_csv = os.path.join(rs_path, results[j])

        bo_df = pd.read_csv(bo_csv, index_col=0)
        rs_df = pd.read_csv(rs_csv, index_col=0)

        bo_df = pd.DataFrame(get_convergence_data(bo_df))
        bo_mean = bo_df.mean(axis=0)
        bo_var = bo_df.var(axis=0)
        bo_std = bo_df.std(axis=0)

        bo_df.loc["mean"] = bo_mean
        bo_df.loc["var"] = bo_var
        bo_df.loc["std"] = bo_std

        final = final + (bo_df.head(15).loc[:, 99].to_numpy().tolist())
        search = search + ["BO"] * 15
        space = space + [i + 1] * 15

        rs_df = pd.DataFrame(get_convergence_data(rs_df))
        rs_mean = rs_df.mean(axis=0)
        rs_var = rs_df.var(axis=0)
        rs_std = rs_df.std(axis=0)

        rs_df.loc["mean"] = rs_mean
        rs_df.loc["var"] = rs_var
        rs_df.loc["std"] = rs_std

        final = final + (rs_df.head(15).loc[:, 99].to_numpy().tolist())
        search = search + ["RS"] * 15
        space = space + [i + 1] * 15
        
        final_ = final_ + final
        search_ = search_ + search
        space_ = space_ + space
        
        mean_diff.append((bo_mean[99] - rs_mean[99]) / rs_mean[99] * 100)
        std_diff.append((bo_std[99] - rs_std[99]) / rs_std[99] * 100)
        
        start = 3 if i < 3 else 10
        h_str = "Hyperparameters" if i > 0 else "Hyperparameter"
        name = os.path.join(path_to_plots, names[j] + "run" + str(i + 1) + "_all.pdf")
        # make_all_plot(bo_df, bo_mean, rs_df, rs_mean, name, h_str, start)

        d = {"mean": final, "search": search, "space": space}
        df = pd.DataFrame(data=d)
        name = os.path.join(path_to_plots, names[j] + "run" + str(i + 1) + "_box.pdf")
        outlier = 1 if i%3 == 0 else 0
        # make_box_plot(df, name, outlier)

d = {"mean": final_, "Algorithm": search_, "space": space_}
df = pd.DataFrame(data=d)
#make_all_box_plot(df=df, name = os.path.join(path_to_plots, "all_box.pdf"))


make_improvement_plot(mean_diff, std_diff)