In [None]:
import datetime as dt
import itertools
import math
import matplotlib.pyplot as plt
import numpy as np
import os
import ot
import pandas as pd
import pickle5 as pickle
import seaborn as sns
import time
from tqdm.notebook import tqdm

In [None]:
np.random.seed(315)

In [None]:
output_dir = "/Users/kevinoconnor/Dropbox/Research/EstimationOfOJ/Simulation"

# Experiment 1

In [None]:
file_names = [
    "identical_markov_chains_results_df.pkl",
    "identical_hmms_results_df.pkl",
    "mc_vs_mc_results_df.pkl",
    "hmm_vs_hmm_results_df.pkl",
]
file_dir = os.path.join(output_dir, "cost_convergence", "20230325093101069040")
results_df = pd.DataFrame()
for file_name in file_names:
    file_path = os.path.join(file_dir, file_name)
    with open(file_path, "rb") as fh:
        _df = pickle.load(fh)
        _df["name"] = file_name
        results_df = pd.concat([results_df, _df], ignore_index=True)

display(results_df)

In [None]:
file_name_dict = {
    "identical_markov_chains_results_df.pkl": "Identical Markov Chains",
    "identical_hmms_results_df.pkl": "Identical HMMs",
    "mc_vs_mc_results_df.pkl": "Markov Chain vs Markov Chain",
    "hmm_vs_hmm_results_df.pkl": "HMM vs HMM",
}

In [None]:
fig, axes = plt.subplots(1, 4, figsize=(21,3))

for i, file_name in enumerate(file_names):
    df_to_plot = results_df[results_df["name"] == file_name]
    for reg, df in df_to_plot.groupby("reg"):
        axes[i].plot(df["n"], df["mean"], label="reg={}".format(reg))

    if i == 0:
        axes[i].legend()

    for reg, df in df_to_plot.groupby("reg"):
        axes[i].fill_between(df["n"], df["lower_sd"], df["upper_sd"], alpha=0.2, label="reg={}".format(reg))

    axes[i].set_xlabel("n")
    axes[i].set_ylabel("Estimated Cost")
    axes[i].set_title(file_name_dict[file_name])
    axes[i].grid()
    
exp_id = ''.join(c for c in str(dt.datetime.now()) if c.isdigit())
fig_dir = os.path.join(output_dir, "figures", exp_id)
os.makedirs(fig_dir, exist_ok=True)
plt.savefig(os.path.join(fig_dir, "cost_convergence.png"), bbox_inches='tight')

plt.show()

# Experiment 2

In [None]:
file_path = os.path.join(output_dir, "20230321090724157474", "time_df.pkl")
with open(file_path, "rb") as fh:
    results_df = pickle.load(fh)
display(results_df.head())

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(16,3))

for reg, df_reg in results_df.groupby("reg"):
    df_grp = df_reg.groupby("n")

    _df = df_grp["time_diff"].agg([np.mean, lambda x: 2*np.std(x)]).reset_index()
    axes[0].errorbar(
        x=_df.iloc[:,0],
        y=_df.iloc[:,1],
        yerr=_df.iloc[:,2],
        label="eta={}".format(reg)
    )

    _df = df_grp["time_diff_pct"].agg([np.mean, lambda x: 2*np.std(x)]).reset_index()
    axes[1].errorbar(
        x=_df.iloc[:,0],
        y=_df.iloc[:,1],
        yerr=_df.iloc[:,2],
        label="eta={}".format(reg)
    )

    _df = df_grp["cost_diff_pct"].agg([np.mean, lambda x: 2*np.std(x)]).reset_index()
    axes[2].errorbar(
        x=_df.iloc[:,0],
        y=_df.iloc[:,1],
        yerr=_df.iloc[:,2],
        label="eta={}".format(reg)
    )

    
axes[0].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
axes[0].set_xlabel("n")
axes[0].set_ylabel("Time Difference (s)")
axes[0].set_title("Difference in Runtimes")
axes[0].legend()
axes[0].grid()    

axes[1].set_xlabel("n")
axes[1].set_ylabel("Time Difference (%)")
axes[1].set_title("Percent Difference in Runtimes")
# axes[1].legend()
axes[1].grid()
    
axes[2].set_xlabel("n")
axes[2].set_ylabel("Cost Difference (%)")
axes[2].set_title("Percent Difference in Cost")
# axes[2].legend()
axes[2].grid()

handles, labels = axes[2].get_legend_handles_labels()

# fig.legend(handles, labels, loc = "lower center", ncol=3, bbox_to_anchor=(0.5, -0.04), borderaxespad=0)

exp_id = ''.join(c for c in str(dt.datetime.now()) if c.isdigit())
fig_dir = os.path.join(output_dir, "figures", exp_id)
os.makedirs(fig_dir, exist_ok=True)
plt.savefig(os.path.join(fig_dir, "runtimes.png"), bbox_inches='tight')

plt.show()