In [None]:
%load_ext autoreload
%autoreload 2

import json
import numpy as np
from pbo.utils.confidence_interval import confidence_interval

experiment_name = "test"
show_fqi = True; show_profqi = True
bellman_iterations_scope = 9
additional_iterations = 10
seeds = range(1, 2)

p = json.load(open(f"figures/{experiment_name}/parameters.json"))

### Extract data

In [None]:
if show_fqi:
    fqi_q = np.zeros((len(seeds), bellman_iterations_scope + 1, p["n_states_x"], p["n_states_v"], 2))
    fqi_v = np.zeros((len(seeds), bellman_iterations_scope + 1, p["n_states_x"], p["n_states_v"]))

    for idx_seed, seed in enumerate(seeds):
        fqi_q[idx_seed] = np.load(f"figures/{experiment_name}/FQI/{bellman_iterations_scope}_Q_{seed}.npy")
        fqi_v[idx_seed] = np.load(f"figures/{experiment_name}/FQI/{bellman_iterations_scope}_V_{seed}.npy")

optimal_q = np.load(f"figures/data/optimal/Q.npy")
optimal_v = np.load(f"figures/data/optimal/V.npy")

if show_profqi:
    profqi_q = np.zeros((len(seeds), bellman_iterations_scope + additional_iterations + 1, p["n_states_x"], p["n_states_v"], 2))
    profqi_v = np.zeros((len(seeds), bellman_iterations_scope + additional_iterations + 1, p["n_states_x"], p["n_states_v"]))

    for idx_seed, seed in enumerate(seeds):
        profqi_q[idx_seed] = np.load(f"figures/{experiment_name}/ProFQI/{bellman_iterations_scope}_Q_{seed}.npy")
        profqi_v[idx_seed] = np.load(f"figures/{experiment_name}/ProFQI/{bellman_iterations_scope}_V_{seed}.npy")


samples_mask = np.load(f"figures/{experiment_name}/samples_count.npy")
samples_mask_q_format = np.repeat(samples_mask[:, :, None], 2, axis=-1)

### Distances with the optimal Q function

In [None]:
if show_fqi:
    fqi_q_mean = np.sqrt(np.mean(np.square(fqi_q - optimal_q) * samples_mask_q_format, axis=(2, 3, 4))).mean(axis=0)
    fqi_q_std = np.sqrt(np.mean(np.square(fqi_q - optimal_q) * samples_mask_q_format, axis=(2, 3, 4))).std(axis=0)
    fqi_q_confidence_interval = confidence_interval(fqi_q_mean, fqi_q_std, len(seeds))

if show_profqi:
    profqi_q_mean = np.sqrt(np.mean(np.square(profqi_q - optimal_q) * samples_mask_q_format, axis=(2, 3, 4))).mean(axis=0)
    profqi_q_std = np.sqrt(np.mean(np.square(profqi_q - optimal_q) * samples_mask_q_format, axis=(2, 3, 4))).std(axis=0)
    profqi_q_confidence_interval = confidence_interval(profqi_q_mean, profqi_q_std, len(seeds))

In [None]:
import matplotlib.pyplot as plt 
from experiments import colors

plt.rc("font", size=15)
plt.rc("lines", linewidth=3)

iterations = range(bellman_iterations_scope + 1)
iterations_validation = range(bellman_iterations_scope + additional_iterations + 1)

if show_profqi:
    plt.plot(iterations_validation, profqi_q_mean, label="ProFQI", color=colors["ProFQI"], zorder=7)
    plt.fill_between(iterations_validation, profqi_q_confidence_interval[0], profqi_q_confidence_interval[1], color=colors["ProFQI"], alpha=0.3)

if show_fqi:
    plt.plot(iterations, fqi_q_mean, label="FQI", color=colors["FQI"], zorder=6)
    plt.fill_between(iterations, fqi_q_confidence_interval[0], fqi_q_confidence_interval[1], color=colors["FQI"], alpha=0.3)

plt.axvline(bellman_iterations_scope, color="black", linestyle="--", zorder=2)
plt.xticks(range(0, bellman_iterations_scope + additional_iterations + 1, 3))
plt.xlabel("#Iterations")
plt.title(r"$|| Q^* - Q_i ||_2$")
plt.legend().set_zorder(9)
plt.grid(zorder=0)
_ = plt.savefig(f"figures/{experiment_name}/distance_to_optimal_Q_{bellman_iterations_scope}.pdf", bbox_inches='tight')

### Visualization of the policies at iteration bellman_iterations_scope

In [None]:
from pbo.utils.two_dimesions_mesh import TwoDimesionsMesh

max_pos = 1.0
max_velocity = 3.0

states_x = np.linspace(-max_pos, max_pos, p["n_states_x"])
states_v = np.linspace(-max_velocity, max_velocity, p["n_states_v"])

q_visu_mesh = TwoDimesionsMesh(states_x, states_v, sleeping_time=0, axis_equal=False, zero_centered=True)

optimal_pi_weighted_average = (2 * ((optimal_q[:, :, 1] > optimal_q[:, :, 0])).astype(float) - 1) + (optimal_q[:, :, 1] == optimal_q[:, :, 0]).astype(float) / 2
# optimal_pi_weighted_average *= samples_mask

q_visu_mesh.set_values(optimal_pi_weighted_average)
q_visu_mesh.show("", xlabel="x", ylabel="v", plot=False, ticks_freq=2)
_ = plt.savefig(f"figures/{experiment_name}/optimal_pi.pdf", bbox_inches='tight')

In [None]:
if show_fqi:
    fqi_left_q = fqi_q[:, -1, :, :, 0]
    fqi_right_q = fqi_q[:, -1, :, :, 1]
    fqi_pi_weighted_average = (2 * (fqi_right_q > fqi_left_q).mean(axis=0) - 1)
    # fqi_pi_weighted_average *= samples_mask

    q_visu_mesh.set_values(fqi_pi_weighted_average)
    q_visu_mesh.show("", xlabel="x", ylabel="v", plot=False, ticks_freq=2)
    _ = plt.savefig(f"figures/{experiment_name}/fqi_pi_{bellman_iterations_scope}.pdf", bbox_inches='tight')

In [None]:
if show_profqi:
    profqi_left_q = profqi_q[:, bellman_iterations_scope, :, :, 0]
    profqi_right_q = profqi_q[:, bellman_iterations_scope, :, :, 1]
    profqi_pi_weighted_average = (2 * (profqi_right_q > profqi_left_q).mean(axis=0) - 1)
    # profqi_pi_weighted_average *= samples_mask

    q_visu_mesh.set_values(profqi_pi_weighted_average)
    q_visu_mesh.show("", xlabel="x", ylabel="v", plot=False, ticks_freq=2)
    _ = plt.savefig(f"figures/{experiment_name}/profqi_pi_{bellman_iterations_scope}.pdf", bbox_inches='tight')

### Distance to optimal value function

In [None]:
if show_fqi:
    fqi_v_mean = np.sqrt(np.mean(np.square(fqi_v - optimal_v) * samples_mask, axis=(2, 3))).mean(axis=0)
    fqi_v_std = np.sqrt(np.mean(np.square(fqi_v - optimal_v) * samples_mask, axis=(2, 3))).std(axis=0)
    fqi_v_confidence_interval = confidence_interval(fqi_v_mean, fqi_v_std, len(seeds))

if show_profqi:
    profqi_v_mean = np.sqrt(np.mean(np.square(profqi_v - optimal_v) * samples_mask, axis=(2, 3))).mean(axis=0)
    profqi_v_std = np.sqrt(np.mean(np.square(profqi_v - optimal_v) * samples_mask, axis=(2, 3))).std(axis=0)
    profqi_v_confidence_interval = confidence_interval(profqi_v_mean, profqi_v_std, len(seeds))

In [None]:
import matplotlib.pyplot as plt 
from experiments import colors

plt.rc("font", size=15)
plt.rc("lines", linewidth=3)

iterations = range(bellman_iterations_scope + 1)
iterations_validation = range(bellman_iterations_scope + additional_iterations + 1)

if show_profqi:
    plt.plot(iterations_validation, profqi_v_mean, label="ProFQI", color="green", zorder=7)
    plt.fill_between(iterations_validation, profqi_v_confidence_interval[0], profqi_v_confidence_interval[1], color="blue", alpha=0.3)

if show_fqi:
    plt.plot(iterations, fqi_v_mean, label="FQI", color=colors["FQI"], zorder=6)
    plt.fill_between(iterations, fqi_v_confidence_interval[0], fqi_v_confidence_interval[1], color=colors["ProFQI"], alpha=0.3)

plt.axvline(bellman_iterations_scope, color="black", linestyle="--", zorder=2)
plt.xticks(range(0, bellman_iterations_scope + additional_iterations + 1, 3))
plt.xlabel("#Iterations")
plt.title(r"$|| V^* - V^{\pi_i} ||_2$")
plt.legend().set_zorder(9)
plt.grid(zorder=0)
_ = plt.savefig(f"figures/{experiment_name}/distance_to_optimal_V_{bellman_iterations_scope}.pdf", bbox_inches='tight')

### Value functions

In [None]:
if show_fqi:
    fqi_v_mean_performance = (fqi_v * samples_mask).mean(axis=(2, 3)).mean(axis=0)
    fqi_v_std_performance = (fqi_v * samples_mask).mean(axis=(2, 3)).std(axis=0)
    fqi_v_confidence_interval_performance = confidence_interval(fqi_v_mean_performance, fqi_v_std_performance, len(seeds))

if show_profqi:
    profqi_v_mean_performance = (profqi_v * samples_mask).mean(axis=(2, 3)).mean(axis=0)
    profqi_v_std_performance = (profqi_v * samples_mask).mean(axis=(2, 3)).std(axis=0)
    profqi_v_confidence_interval_performance = confidence_interval(profqi_v_mean_performance, profqi_v_std_performance, len(seeds))

optimal_v_performance = (optimal_v * samples_mask).mean()

In [None]:
import matplotlib.pyplot as plt 

iterations = range(bellman_iterations_scope + 1)
iterations_validation = range(bellman_iterations_scope + additional_iterations + 1)

plt.hlines(optimal_v_performance, 0, bellman_iterations_scope + additional_iterations, color="black", linestyle="--", label="Optimal")

if show_profqi:
    plt.plot(iterations_validation, profqi_v_mean_performance, label="ProFQI", color=colors["ProFQI"])
    plt.fill_between(iterations_validation, profqi_v_confidence_interval_performance[0], profqi_v_confidence_interval_performance[1], color=colors["ProFQI"], alpha=0.3)

if show_fqi:
    plt.plot(iterations, fqi_v_mean_performance, label="FQI", color=colors["FQI"])
    plt.fill_between(iterations, fqi_v_confidence_interval_performance[0], fqi_v_confidence_interval_performance[1], color=colors["FQI"], alpha=0.3)

plt.axvline(bellman_iterations_scope, color="black", linestyle="--", zorder=2)
plt.xticks(range(0, bellman_iterations_scope + additional_iterations + 1, 3))
plt.xlabel("#Iterations")
plt.title(r"$ Ji $")
plt.legend()
plt.grid(zorder=0)
_ = plt.savefig(f"figures/{experiment_name}/V_{bellman_iterations_scope}.pdf", bbox_inches='tight')

### Visualization of the last value functions

In [None]:
from pbo.utils.two_dimesions_mesh import TwoDimesionsMesh

max_pos = 1.0
max_velocity = 3.0

states_x = np.linspace(-max_pos, max_pos, p["n_states_x"])
states_v = np.linspace(-max_velocity, max_velocity, p["n_states_v"])

q_visu_mesh = TwoDimesionsMesh(states_x, states_v, sleeping_time=0, axis_equal=False, zero_centered=True)

optimal_v_weighted_average = optimal_v * samples_mask

q_visu_mesh.set_values(optimal_v_weighted_average)
q_visu_mesh.show(r"$V^{\pi^*}, \mathbb{E}\left[ V^{\pi^*} \right] =$" + str(np.round(np.mean(optimal_v_weighted_average), 2)), xlabel="x", ylabel="v", plot=False)
_ = plt.savefig(f"figures/{experiment_name}/optimal_V_{bellman_iterations_scope}.pdf", bbox_inches='tight')

In [None]:
if show_fqi:
    fqi_v_weighted_average = fqi_v[:, -1].mean(axis=0) * samples_mask

    q_visu_mesh.set_values(fqi_v_weighted_average)
    q_visu_mesh.show(r"$V^{\pi_{fqi}}, \mathbb{E}\left[ V^{\pi_{fqi}} \right] =$" + f"{np.round(np.mean(fqi_v_weighted_average), 4)}", xlabel="x", ylabel="v", plot=False)
    _ = plt.savefig(f"figures/{experiment_name}/fqi_V_{bellman_iterations_scope}.pdf", bbox_inches='tight')

In [None]:
if show_profqi:
    profqi_v_weighted_average = profqi_v[:, bellman_iterations_scope].mean(axis=0) * samples_mask

    q_visu_mesh.set_values(profqi_v_weighted_average)
    q_visu_mesh.show(r"$V^{\pi_{pbo}}, \mathbb{E}\left[ V^{\pi_{pbo\_linear}} \right] =$" + str(np.round(np.mean(profqi_v_weighted_average), 4)), xlabel="x", ylabel="v", plot=False)
    _ = plt.savefig(f"figures/{experiment_name}/profqi_V_{bellman_iterations_scope}.pdf", bbox_inches='tight')