In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
ENV = "cartpole" # cartpole, acrobot, mountain_car

n_dqn_samples = {
    "acrobot": 100000,
    "cartpole": 5000,
    "mountain_car": 200000,
}
n_rrt_samples = {
    "acrobot": 10000,
    "cartpole": 1000,
    "mountain_car": 50000,
}

## Comparative convergence

In [None]:
# Best step
# MC: 641 (RRT), 1920 (DQN)
# Acrobot: 955 (RRT), 484 (DQN)
# Cartpole 570 (RRT) 388 (DQN)
# np.argmax(r_dqn)

In [None]:
r_dqn = np.load(os.path.join("output", ENV, "rewards.dqn.npy"))
r_rrt = np.load(os.path.join("output", ENV, "rewards.rrt.npy")) # Until best step + 100

plt.plot(np.maximum.accumulate(r_dqn), color="b", label=f"baseline (N={n_dqn_samples[ENV]})")
plt.plot(np.maximum.accumulate(r_rrt), color="r", label=f"RRT (N={n_rrt_samples[ENV]})")
plt.plot(r_dqn, color="b", alpha=0.2)
plt.plot(r_rrt, color="r", alpha=0.2)
plt.legend()

plt.title(f"{ENV.capitalize().replace("_", " ")}: Validation rewards and cumulative best results")
if ENV in {"acrobot", "mountain_car"}:
    plt.ylim(top=0)
plt.ylabel("validation reward")
plt.xlabel("step")

plt.savefig(os.path.join("output", ENV, "convergence.png"), dpi=200)

## Proportion of useful samples (relevant for Acrobot)

In [None]:
base_samples = pd.read_csv(os.path.join("output", ENV, "samples.baseline.csv"))
rrt_samples = pd.read_csv(os.path.join("output", ENV, "samples.rrt.csv"))

n_base = sum(base_samples.reward > -0.5)
n_rrt = sum(rrt_samples.reward > -0.5)

print("All base rewards with positive terminal outcome: ")
print(f"base:\t{n_base} ({n_base/len(base_samples)*100:.2f} %)")
print(f"RRT:\t{n_rrt} ({n_rrt/len(rrt_samples)*100:.2f} %)")

## State space coverage (best with Mountain Car)

## Other

In [None]:
states_b = np.load(os.path.join("output", ENV, "states.baseline.npy"))
# states_e = np.load(os.path.join("output", ENV, "states.baseline.equivalent.npy"))
states_r = np.load(os.path.join("output", ENV, "states.rrt.npy"))

plt.scatter(states_r[:,0], states_r[:,1], alpha=0.2, label=f"RRT (N={n_rrt_samples[ENV]})", c="tab:orange", marker="o")
plt.scatter(states_b[:,0], states_b[:,1], alpha=0.2, label=f"baseline (N={n_dqn_samples[ENV]})", c="tab:blue", marker="o")
plt.title(f"{ENV.capitalize().replace("_", " ")}: State space coverage")
plt.xlabel("$x$")
plt.ylabel("$x'$")
plt.legend()
plt.savefig(os.path.join("output", ENV, "coverage.png"), dpi=200)

In [None]:
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns

# Load state data
states_b = np.load(os.path.join("output", ENV, "states.baseline.npy"))
states_r = np.load(os.path.join("output", ENV, "states.rrt.npy"))

# Set up figure with two subplots in a single row
fig, axs = plt.subplots(1, 2, figsize=(10, 6), sharex=True, sharey=True)
fig.suptitle("State space coverage on the Mountain Car task")

# Set axis limits
x_lim, y_lim = (-1.25, 0.5), (-0.07, 0.07)

# Plot baseline states with density-based coloring
sns.kdeplot(x=states_b[:,0], y=states_b[:,1], fill=True, cmap="Blues", ax=axs[0], thresh=0.05)
axs[0].set_title("Baseline")
axs[0].set_xlim(x_lim)
axs[0].set_ylim(y_lim)
axs[0].set_xlabel("$x$")
axs[0].set_ylabel("$x'$")

# Plot RRT states with density-based coloring
sns.kdeplot(x=states_r[:,0], y=states_r[:,1], fill=True, cmap="Oranges", ax=axs[1], thresh=0.05)
axs[1].set_title("RRT")
axs[1].set_xlim(x_lim)
axs[1].set_ylim(y_lim)
axs[1].set_xlabel("$x$")

# Adjust layout and save the figure
plt.tight_layout()
plt.savefig(os.path.join("output", ENV, "coverage_density.png"), dpi=200)
plt.show()


In [None]:
states_b[states_b[:,0] >= -0.1]

In [None]:
plt.plot(np.load(os.path.join("output", ENV, "rewards.dqn.npy")))

In [None]:
states_b = np.load(os.path.join("output", ENV, "states.baseline.npy"))
states_e = np.load(os.path.join("output", ENV, "states.baseline.equivalent.npy"))
states_r = np.load(os.path.join("output", ENV, "states.rrt.npy"))

In [None]:
plt.scatter(states_r[:,4], states_r[:,5])
plt.xlabel("$\\theta_1'$")
plt.ylabel("$\\theta_2'$")

In [None]:
X, Y = 4, 5
plt.scatter(states_e[:,X], states_e[:,Y], c='b', label="baseline", alpha=0.3)
plt.scatter(states_r[:,X], states_r[:,Y], c='r', label="RRT", alpha=0.3)
plt.legend()
plt.title("Acrobot: state space coverage with 10000 samples")
plt.xlabel("$\\theta_1'$")
plt.ylabel("$\\theta_2'$")
plt.savefig("cov.png", dpi=400)

In [None]:
X, Y = 1, 3
plt.scatter(states_e[:,X], states_e[:,Y], c='b', label="baseline", alpha=0.3)
plt.scatter(states_r[:,X], states_r[:,Y], c='r', label="RRT", alpha=0.3)
plt.legend()
plt.title("Acrobot: state space coverage with 10000 samples")
plt.xlabel("$sin(\\theta_1)$")
plt.ylabel("$sin(\\theta_2)$")
plt.savefig("cov2.png", dpi=400)

In [None]:
states_e.shape, states_r.shape