In [None]:
%load_ext autoreload
%autoreload 2

import json
import numpy as np
import pandas as pd

experiment_name = "bound_r20_b50_N5500_lr03_a50" # "bound_r150_b50_N5500_lr03_a50"
bellman_iterations_scopes = [1, 5, 10, 20]
seed = 11

p = json.load(open(f"figures/{experiment_name}/parameters.json"))

In [None]:
import math 
import jax

from idqn.networks.architectures.ifqi import CarOnHilliFQI
from idqn.environments.car_on_hill import CarOnHillEnv
from idqn.utils.pickle import load_pickled_data
from experiments.car_on_hill.utils import TwoDimesionsMesh

env = CarOnHillEnv(p["gamma"])
states_x = np.linspace(-env.max_position, env.max_position, p["n_states_x"])
states_v = np.linspace(-env.max_velocity, env.max_velocity, p["n_states_v"])


q_values = np.ones((len(bellman_iterations_scopes), p["n_bellman_iterations"], p["n_states_x"], p["n_states_v"], 2))
q_diff_values = np.ones((len(bellman_iterations_scopes), p["n_bellman_iterations"], p["n_states_x"], p["n_states_v"]))
performances = np.zeros(len(bellman_iterations_scopes))

for idx_bellman_iterations_scope, bellman_iterations_scope in enumerate(bellman_iterations_scopes):
    q = CarOnHilliFQI(
        bellman_iterations_scope + 1,
        p["features"],
        (2,),
        env.n_actions,
        math.pow(p["gamma"], p["n_step_return"]),
        jax.random.PRNGKey(0),
        p["learning_rate"],
        p["optimizer_eps"],
    )

    for idx_bellman_iteration in range(p["n_bellman_iterations"] - bellman_iterations_scope):
        q.params = load_pickled_data(f"figures/{experiment_name}/iFQI/{bellman_iterations_scope}_Q_s{seed}_r{idx_bellman_iteration}_online_params")

        q_values[idx_bellman_iterations_scope, idx_bellman_iteration] = env.q_estimate_mesh(q, jax.tree_map(lambda param: param[0][None], q.params), states_x, states_v)
        q_diff_values[idx_bellman_iterations_scope, idx_bellman_iteration] = env.diff_q_estimate_mesh(q, jax.tree_map(lambda param: param[0][None], q.params), states_x, states_v)

    q.params = load_pickled_data(f"figures/{experiment_name}/iFQI/{bellman_iterations_scope}_Q_s{seed}_r{p['n_bellman_iterations'] - bellman_iterations_scope}_online_params")

    for idx_in_params, idx_bellman_iteration in enumerate(range(p["n_bellman_iterations"] - bellman_iterations_scope, p["n_bellman_iterations"])):
        q_values[idx_bellman_iterations_scope, idx_bellman_iteration] = env.q_estimate_mesh(q, jax.tree_map(lambda param: param[idx_in_params][None], q.params), states_x, states_v)
        q_diff_values[idx_bellman_iterations_scope, idx_bellman_iteration] = env.diff_q_estimate_mesh(q, jax.tree_map(lambda param: param[idx_in_params][None], q.params), states_x, states_v)
    
    performances[idx_bellman_iterations_scope] = env.evaluate(q, jax.tree_map(lambda param: param[-1][None], q.params), p["horizon"], np.array([-0.5, 0]))

In [None]:
import matplotlib.pyplot as plt 


plt.rc("font", size=15)
plt.rc("lines", linewidth=3)


optimal_q = np.load(f"figures/data/optimal/Q.npy")
samples_mask = np.load(f"figures/data/samples_count.npy")
samples_mask_q_format = np.repeat(samples_mask[:, :, None], 2, axis=-1)

for idx_bellman_iterations_scope, bellman_iterations_scope in enumerate(bellman_iterations_scopes):
    plt.plot(np.sqrt(np.mean(np.square(q_values[idx_bellman_iterations_scope] - optimal_q) * samples_mask_q_format, axis=(1, 2, 3))), label=f"K={bellman_iterations_scope}")

plt.xticks(range(0, p["n_bellman_iterations"], 3))
plt.xlabel("#Iterations")
plt.title(r"$|| Q^* - Q_i ||_2$")
plt.legend().set_zorder(3)
plt.grid(zorder=0)
# _ = plt.savefig(f"figures/{experiment_name}/distance_to_optimal_Q.pdf", bbox_inches='tight')

In [None]:
q_visu_mesh = TwoDimesionsMesh(states_x, states_v, axis_equal=False, zero_centered=True)

for idx_bellman_iterations_scope, bellman_iterations_scope in enumerate(bellman_iterations_scopes):
    k = -1
    title = r"$\pi^K_k$" + f" for K = {bellman_iterations_scope} at k = {k}\n"
    title += f"performance {np.around(performances[idx_bellman_iterations_scope], 2)}"

    q_visu_mesh.set_values((q_diff_values[idx_bellman_iterations_scope, k] > 0).astype(float))
    q_visu_mesh.show(title, xlabel="x", ylabel="y", ticks_freq=3)

In [None]:
# for idx_bellman_iterations_scope, bellman_iterations_scope in enumerate(bellman_iterations_scopes):
#     bound_info = np.load(f"figures/{experiment_name}/iFQI/{bellman_iterations_scope}_bound_info_s{seed}.npy")

#     print(
#         "Number of time where the condition is satisfied", 
#         np.sum(bound_info[:, 0] >= 0)
#     )
#     print(
#         "Average index when the condition is satisfied", 
#         np.around(np.arange(bound_info.shape[0])[bound_info[:, 0] >= 0].mean(), 1)
#     )
#     print(
#         "Number of time where the condition is satisfied and the approx error has increased", 
#         np.sum(np.logical_and(bound_info[:, 0] >= 0, bound_info[:, 1] < 0))
#     )

#     # pd.DataFrame(bound_info, columns=["condition", "approximation error loss"])