In [None]:
%load_ext autoreload
%autoreload 2

import json
import numpy as np

experiment_name = "N50000_t500_b100_lr1_5" # "N50000_t500_b100_lr1_5"
bellman_iterations_scopes = [1, 4, 7, 10]
seeds = [int(f"1{idx}") for idx in range(1, 21)]

p = json.load(open(f"figures/{experiment_name}/parameters.json"))

In [None]:
import matplotlib.pyplot as plt 

from experiments.car_on_hill import COLORS

plt.rc("font", size=15)
plt.rc("lines", linewidth=3)

final_sum_approx_errors = {}


for idx_bellman_iterations_scope, bellman_iterations_scope in enumerate(bellman_iterations_scopes):
    approx_errors_seeds = []
    for seed in seeds:
        approx_errors_seeds.append(np.load(f"figures/{experiment_name}/iFQI_bound/{bellman_iterations_scope}_approx_errors_s{seed}.npy"))

    approx_errors = np.array(approx_errors_seeds).mean(axis=0)
    final_sum_approx_errors[bellman_iterations_scope] = np.around(np.sum(approx_errors[1:]), 4)

    frozen_line = p["n_bellman_iterations"] - bellman_iterations_scope + 1
    plt.plot(range(1, frozen_line + 1), approx_errors[1: frozen_line + 1], label=f"K={bellman_iterations_scope}", c=COLORS[str(bellman_iterations_scope)])
    plt.plot(range(frozen_line, p["n_bellman_iterations"] + 1), approx_errors[frozen_line:], linestyle="dotted", c=COLORS[str(bellman_iterations_scope)])

plt.xticks(range(1, p["n_bellman_iterations"] + 1, 3))
plt.xlabel("#Iteration i")
plt.ylabel(r"$|| \Gamma^* Q_{i-1}^K - Q_i^K ||_2$")
plt.title("Car-On-Hill")
plt.legend().set_zorder(3)
plt.grid(zorder=0)
# _ = plt.savefig(f"figures/{experiment_name}/distance_to_optimal_Q.pdf", bbox_inches='tight')

In [None]:
for idx_bellman_iterations_scope, bellman_iterations_scope in enumerate(bellman_iterations_scopes):
    diff_q_values_seeds = []
    for seed in seeds:
        diff_q_values_seeds.append(np.load(f"figures/{experiment_name}/iFQI_bound/{bellman_iterations_scope}_distance_Q_s{seed}.npy"))

    diff_q_values = np.array(diff_q_values_seeds).mean(axis=0)

    frozen_line = p["n_bellman_iterations"] - bellman_iterations_scope + 1
    plt.plot(range(1, frozen_line + 1), diff_q_values[1: frozen_line + 1], label=f"K={bellman_iterations_scope}", c=COLORS[str(bellman_iterations_scope)])
    plt.plot(range(frozen_line, p["n_bellman_iterations"] + 1), diff_q_values[frozen_line:], linestyle="dotted", c=COLORS[str(bellman_iterations_scope)])
    
plt.xticks(range(1, p["n_bellman_iterations"] + 1, 3))
plt.xlabel("#Iteration i")
plt.ylabel(r"$|| Q^* - Q_i^K ||_2$")
plt.title("Car-On-Hill")
plt.legend().set_zorder(3)
plt.grid(zorder=0)
# _ = plt.savefig(f"figures/{experiment_name}/distance_to_optimal_Q.pdf", bbox_inches='tight')

In [None]:
import pandas as pd

bound_infos = {
    "K" : [],
    "Final sum approx error": [],
    "E[AE gain]" : [],
    "% AE gain" : [],
    "% condition valid and AE < 0" : [],
    "% condition valid and AE >= 0" : [],
    "----": [],
    "E[condition]" : [],
    "std[condition]" : [],
    "std[AE gain]" : [],
}


for idx_bellman_iterations_scope, bellman_iterations_scope in enumerate(bellman_iterations_scopes):
    bound_info = np.load(f"figures/{experiment_name}/iFQI_bound/{bellman_iterations_scope}_bound_info_s{seeds[0]}.npy")

    mean_condition, mean_approx_error_gain = np.mean(bound_info, axis=0)
    std_condition, std_approx_error_gain = np.std(bound_info, axis=0)
    percentage_approx_error_gain = np.around(100 * np.mean(bound_info[:, 1] >= 0, axis=0))
    percentage_condition_approx_error_gain = np.around(100 * np.mean(np.logical_and(bound_info[:, 0] >= 0, bound_info[:, 1] >= 0)))
    percentage_wrong_condition = np.around(100 * np.mean(np.logical_and(bound_info[:, 0] >= 0, bound_info[:, 1] < 0)))

    bound_infos["K"].append(bellman_iterations_scope)
    bound_infos["Final sum approx error"].append(final_sum_approx_errors[bellman_iterations_scope])
    bound_infos["E[AE gain]"].append(mean_approx_error_gain)
    bound_infos["% AE gain"].append(percentage_approx_error_gain)
    bound_infos["% condition valid and AE < 0"].append(percentage_wrong_condition)
    bound_infos["% condition valid and AE >= 0"].append(percentage_condition_approx_error_gain)
    bound_infos["----"].append("-")
    bound_infos["E[condition]"].append(mean_condition)
    bound_infos["std[condition]"].append(std_condition)
    bound_infos["std[AE gain]"].append(std_approx_error_gain)

pd.DataFrame(bound_infos).round(3)