In [None]:
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns
import pandas as pd
import numpy as np
import re
import os

from utils import load_yaml, load_pickled_object

sns.set_style("whitegrid")
mpl.rc("savefig", dpi=400)

In [None]:
env_name = "LunarLander-v2"
results_dir = "results"
analysis_dir = "analysis"

In [None]:
def get_algo_results(algo, results_dir):

    columns = [
        "env", "algo", "seed", 
    ]

    metrics = [
        "mean_episode_reward", "std_episode_reward", 
        "mean_fidelity", "std_fidelity", 
        "mean_misclassification_cost", "std_misclassification_cost"
    ]
    if algo in ["VIPER", "CS-VIPER"]:
        columns += ["dt_depth"]
        columns += metrics
    else:
        columns += ["no_of_experts", "dt_depth"]
        columns += metrics
        columns += [
            "mean_episode_reward_d", "std_episode_reward_d", 
            "mean_fidelity_d", "std_fidelity_d", 
            "mean_misclassification_cost_d", "std_misclassification_cost_d"
        ]  

    algo_results = pd.DataFrame(columns=columns)

    algo_dir = os.path.join(results_dir, algo.replace("-", "_").lower())
    for subdir in os.listdir(algo_dir):

        subdir_tags = subdir.split("_")
        if algo in ["VIPER", "CS-VIPER"]:
            row = {"env": subdir_tags[0], "algo": algo, "dt_depth": int(subdir_tags[1]), "seed": int(subdir_tags[2])}
        else:
            row = {"env": subdir_tags[0], "algo": algo, "no_of_experts": int(subdir_tags[1]), "dt_depth": int(subdir_tags[2]), "seed": int(subdir_tags[3])}
            results = load_yaml(os.path.join(algo_dir, subdir, "results_d.yml"))
            for metric in metrics:
                row[metric + "_d"] = results[metric + "_d"]

        results = load_yaml(os.path.join(algo_dir, subdir, "results.yml"))
        for metric in metrics:
            row[metric] = results[metric]

        algo_results = algo_results.append(row, ignore_index = True)

    algo_results.sort_values(by=["env", "dt_depth", "seed"], inplace=True)

    return algo_results

In [None]:
def generate_performance_comparison_plots(algo, results_dir, analysis_dir, env=None):

    if algo == "VIPER":
        algo1 = "VIPER"
        algo2 = "CS-VIPER"
        dir_name = "VIPER vs CS-VIPER"
    else:
        algo1 = "MoET"
        algo2 = "CS-MoET"
        dir_name = "MoET vs CS-MoET"

    if not os.path.exists(os.path.join(analysis_dir, dir_name)):
        os.makedirs(os.path.join(analysis_dir, dir_name))

    algo1_results = get_algo_results(algo1, results_dir)
    algo2_results = get_algo_results(algo2, results_dir)

    if env:
        algo1_results = algo1_results.loc[algo1_results["env"]==env]
        algo2_results = algo2_results.loc[algo2_results["env"]==env]

    results = pd.concat([algo1_results, algo2_results], axis=0)

    for env in results["env"].unique():

        env_results = results.loc[results["env"] == env]

        metrics = ["mean_episode_reward", "mean_fidelity", "mean_misclassification_cost"]
        metrics_l = ["Cumulative Reward", "Fidelity", "Misclassification Cost"]

        os.makedirs(os.path.join(analysis_dir, dir_name, env), exist_ok=True)

        if "no_of_experts" not in results.columns:
            
            for i in range(len(metrics)):
                
                fig, ax = plt.subplots(1, 1, figsize=(6, 4))

                g = sns.barplot(data=env_results, x="dt_depth", y=metrics[i], hue="algo", errorbar="sd", errwidth=2, ax=ax)

                ax.set_xlabel("Tree Depth")    
                ax.set_ylabel(metrics_l[i])    

                if metrics_l[i] == "Misclassification Cost":
                    ax.legend(loc='upper right')
                else:
                    ax.legend(loc='lower right')

                ax.set_title(env)
                plt.tight_layout(pad=0.5)
                fig.savefig(os.path.join(analysis_dir, dir_name, env, metrics_l[i] + ".png"))

        else:

            metrics += ["mean_episode_reward_d", "mean_fidelity_d", "mean_misclassification_cost_d"]
            metrics_l += ["Cumulative Reward (disc.)", "Fidelity (disc.)", "Misclassification Cost (disc.)"]

            for no_of_experts in results["no_of_experts"].unique():

                os.makedirs(os.path.join(analysis_dir, dir_name, env, str(no_of_experts)), exist_ok=True)

                env_exp_results = env_results.loc[env_results["no_of_experts"]==no_of_experts]

                for i in range(len(metrics)):
                
                    fig, ax = plt.subplots(1, 1, figsize=(6, 4))

                    g = sns.barplot(data=env_exp_results, x="dt_depth", y=metrics[i], hue="algo", errorbar="sd", errwidth=2, ax=ax)

                    ax.set_xlabel("Tree Depth")    
                    ax.set_ylabel(metrics_l[i])    

                    if metrics_l[i] == "Misclassification Cost":
                        ax.legend(loc='upper right')
                    else:
                        ax.legend(loc='lower right')

                    ax.set_title(env + ": " + str(no_of_experts) + " Experts")
                    plt.tight_layout(pad=0.5)
                    fig.savefig(os.path.join(analysis_dir, dir_name, env, str(no_of_experts), metrics_l[i] + ".png"))

In [None]:
import itertools

def generate_moet_performance_comparison_plots(no_of_experts, results_dir, analysis_dir, env=None):

    algo1 = "MoET"
    algo2 = "CS-MoET"
    dir_name = "MoET vs CS-MoET"

    if not os.path.exists(os.path.join(analysis_dir, dir_name)):
        os.makedirs(os.path.join(analysis_dir, dir_name))

    algo1_results = get_algo_results(algo1, results_dir)
    algo2_results = get_algo_results(algo2, results_dir)

    if env:
        algo1_results = algo1_results.loc[algo1_results["env"]==env]
        algo2_results = algo2_results.loc[algo2_results["env"]==env]

    results = pd.concat([algo1_results, algo2_results], axis=0)
    results = results.loc[results["no_of_experts"].isin(no_of_experts)]
    results["algo"] = results["algo"] + "-" + results["no_of_experts"].astype(str)

    for env in results["env"].unique():

        env_results = results.loc[results["env"] == env]

        metrics = ["mean_episode_reward", "mean_fidelity", "mean_misclassification_cost"]
        metrics_l = ["Cumulative Reward", "Fidelity", "Misclassification Cost"]

        os.makedirs(os.path.join(analysis_dir, dir_name, env), exist_ok=True)
            
        for i in range(len(metrics)):
            
            fig, ax = plt.subplots(1, 1, figsize=(8, 5))

            g = sns.barplot(data=env_results, x="dt_depth", y=metrics[i], hue="algo", hue_order=["MoET-2", "CS-MoET-2", "MoET-3", "CS-MoET-3"], palette=["tab:blue" , "tab:orange", "tab:blue" , "tab:orange"], errorbar="sd", errwidth=2, ax=ax)

            hatches = itertools.cycle(['..', '..', '//', '//'])
            for k, bar in enumerate(ax.patches):
                if k % 15 == 0:
                    hatch = next(hatches)
                bar.set_hatch(hatch)

            ax.set_xlabel("Tree Depth")    
            ax.set_ylabel(metrics_l[i])    

            if metrics_l[i] == "Misclassification Cost":
                ax.legend(loc='upper right')
            else:
                ax.legend(loc='lower right')

            ax.set_title(env)
            plt.tight_layout(pad=0.5)
            fig.savefig(os.path.join(analysis_dir, dir_name, env, metrics_l[i] + ".png"))


In [None]:
generate_moet_performance_comparison_plots([2, 3], results_dir, analysis_dir, env="LunarLander-v2")

In [None]:
generate_moet_performance_comparison_plots([2, 3], results_dir, analysis_dir, env="Taxi-v3")

In [None]:
generate_moet_performance_comparison_plots([2, 3], results_dir, analysis_dir, env="FourRooms")

In [None]:
generate_moet_performance_comparison_plots([2, 3], results_dir, analysis_dir, env="highway-fast-v0")

In [None]:
generate_performance_comparison_plots("VIPER", results_dir, analysis_dir, env=None)

In [None]:
generate_performance_comparison_plots("MoET", results_dir, analysis_dir, env="LunarLander-v2")

In [None]:
generate_performance_comparison_plots("MoET", results_dir, analysis_dir, env="Taxi-v3")

In [None]:
generate_performance_comparison_plots("MoET", results_dir, analysis_dir, env="FourRooms")

In [None]:
generate_performance_comparison_plots("MoET", results_dir, analysis_dir, env="highway-fast-v0")

In [None]:
def get_algo_models(algo, results_dir, env, dt_depth, no_of_experts=None):

    if algo in ["VIPER", "CS-VIPER"]:
        model_name = "tree.pkl"
        regex_pattern = env + "_" + str(dt_depth) + "_"
    else:
        model_name = "model.pkl"
        regex_pattern = env + "_" + str(no_of_experts) + "_" + str(dt_depth) + "_"

    algo_results_dirs = os.listdir(os.path.join(results_dir, algo.replace("-", "_").lower()))

    pattern = re.compile(regex_pattern)

    algo_results_dirs = [x for x in algo_results_dirs if pattern.match(x)]

    algo_model_paths = [os.path.join(results_dir, algo.replace("-", "_").lower(), x, model_name) for x in algo_results_dirs]

    algo_models = [load_pickled_object(path) for path in algo_model_paths]

    return algo_models

def generate_misclassification_cost_ecdf_plots(algo, results_dir, analysis_dir, env, dt_depth, no_of_experts=None, xlims=None):

    if algo == "VIPER":
        algo1 = "VIPER"
        algo2 = "CS-VIPER"
        dir_name = "VIPER vs CS-VIPER"
    else:
        algo1 = "MoET"
        algo2 = "CS-MoET"
        dir_name = "MoET vs CS-MoET"

    if not os.path.exists(os.path.join(analysis_dir, dir_name)):
        os.makedirs(os.path.join(analysis_dir, dir_name))

    algo1_models = get_algo_models(algo1, results_dir, env, dt_depth, no_of_experts)
    algo2_models = get_algo_models(algo2, results_dir, env, dt_depth, no_of_experts)

    dataset = load_pickled_object(os.path.join("datasets", env, "2000.pkl"))
    obss, acts, qs = dataset

    states = np.array(obss)
    qvals = np.array([q[0, :] for q in qs])

    algo1_model_preds = [model.predict(states) for model in algo1_models]
    algo2_model_preds = [model.predict(states) for model in algo2_models]

    algo1_model_qvals = [qvals[np.arange(qvals.shape[0]), algo1_model_pred] for algo1_model_pred in algo1_model_preds]
    algo2_model_qvals = [qvals[np.arange(qvals.shape[0]), algo2_model_pred] for algo2_model_pred in algo2_model_preds]

    dfs = []
    for i in range(len(algo1_models)):
        df = pd.DataFrame()
        df["Max Q Value"] = np.max(qvals, axis=1)
        df["Q Value"] = algo1_model_qvals[i]
        df["Algorithm"] = algo1

        dfs.append(df)

    algo1_df = pd.concat(dfs, axis=0)
    algo1_df["Misclassification Cost"] = algo1_df["Max Q Value"] - algo1_df["Q Value"]

    # algo1_accuracy = round((algo1_df[algo1_df["Misclassification Cost"] == 0].shape[0]/algo1_df.shape[0])*100, 2)

    dfs = []
    for i in range(len(algo2_models)):
        df = pd.DataFrame()
        df["Max Q Value"] = np.max(qvals, axis=1)
        df["Q Value"] = algo2_model_qvals[i]
        df["Algorithm"] = algo2

        dfs.append(df)

    algo2_df = pd.concat(dfs, axis=0)
    algo2_df["Misclassification Cost"] = algo2_df["Max Q Value"] - algo2_df["Q Value"]

    # algo2_accuracy = round((algo2_df[algo2_df["Misclassification Cost"] == 0].shape[0]/algo2_df.shape[0])*100, 2)

    df = pd.concat([algo1_df, algo2_df], axis=0)

    fig, ax = plt.subplots(1, 1, figsize=(6, 4))

    sns.ecdfplot(df[df["Misclassification Cost"] > 0], x="Misclassification Cost", hue="Algorithm", hue_order=[algo1, algo2], ax=ax)

    # legend = ax.get_legend()
    # new_labels = [algo1 + " (" + str(algo1_accuracy) + "%)", algo2 + " (" + str(algo2_accuracy) + "%)"]
    # for t, l in zip(legend.texts, new_labels):
    #     t.set_text(l)

    sns.move_legend(ax, loc="lower right", title=None)

    if xlims:
        ax.set(xlim=xlims)
    
    save_dir = os.path.join(analysis_dir, dir_name, env, "misclassification_cost_ecdf_plots")
    os.makedirs(save_dir, exist_ok=True)

    if algo == "VIPER":
        ax.set(title=env + ": Misclassification Cost ECDF" + "; d=" + str(dt_depth))
        fig.tight_layout(pad=0.5)
        fig.savefig(os.path.join(save_dir, "depth=" + str(dt_depth) + ".png"))
    else:
        ax.set(title=env + ": Misclassification Cost ECDF" + "; noe=" + str(no_of_experts) + ", d=" + str(dt_depth))
        fig.tight_layout(pad=0.5)
        fig.savefig(os.path.join(save_dir, "no_of_experts=" + str(no_of_experts) + "; depth=" + str(dt_depth) + ".png"))

In [None]:
for i in range(1, 21):
    generate_misclassification_cost_ecdf_plots("VIPER", results_dir, analysis_dir, "FourRooms", i, no_of_experts=None, xlims=(0, 0.35))

In [None]:
for i in range(1, 16):
    generate_misclassification_cost_ecdf_plots("VIPER", results_dir, analysis_dir, "highway-fast-v0", i, no_of_experts=None, xlims=(0, 8))

In [None]:
for i in range(1, 16):
    generate_misclassification_cost_ecdf_plots("VIPER", results_dir, analysis_dir, "LunarLander-v2", i, no_of_experts=None, xlims=(0, 10))

In [None]:
for i in range(1, 16):
    generate_misclassification_cost_ecdf_plots("VIPER", results_dir, analysis_dir, "Taxi-v3", i, no_of_experts=None, xlims=(0, 12))