# Robot Learning Visualizations

#### Basic Setup

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns

# Change some settings for better aesthetics
mpl.rcParams.update({
    "font.family": "serif",
    "mathtext.fontset": "cm",
    "axes.labelsize": 14,
    "axes.titlesize": 16,
    "legend.fontsize": 12,
    "xtick.labelsize": 12,
    "ytick.labelsize": 12,
    "lines.linewidth": 2,
})

# Enable high-definition inline plotting
%matplotlib inline
%config InlineBackend.figure_format = 'retina'  # or 'svg' for vector


# Optional: other style
sns.set_theme(style="darkgrid")


#### Load Evaluation Log

In [None]:
df = pd.read_csv("eval_logs/eval_log.csv")
#df = pd.read_csv("eval_logs/PPO_10deg_rand_force.csv")
#df = pd.read_csv("eval_logs/PPO_10deg_rand_force_4.csv")
#df = pd.read_csv("eval_logs/PPO_10deg_rand_force_4_0force.csv")
# df = pd.read_csv("eval_logs/PPO_10deg_rand_force_5.csv")

print("Data loaded:", df.shape)
df.head()


#### Reward per Step

In [None]:
df.groupby("step")["reward"].sum().plot(title="Reward per Step", figsize=(10, 4))
plt.ylabel("Reward")
plt.xlabel("Step")
plt.ylim(top=1.005)

plt.axhline(y=1.0, color="black", linestyle="--", linewidth=1)  # Dashed line at y=1

#### Physical Parameters over Time (in one Plot)

In [None]:
fig, axes = plt.subplots(3, 1, figsize=(10, 7), sharex=True)

# --- Positions ---
axes[0].plot(df["x"], label=r"$x$ position")
axes[0].plot(df["y"], label=r"$y$ position")
axes[0].plot(df["z"], label=r"$z$ position")
axes[0].set_title(r"Position over Time")
axes[0].set_ylabel(r"Position $\mathrm{[m]}$")
axes[0].legend(loc="center left", bbox_to_anchor=(1.02, 0.5), borderaxespad=0)

# --- Angles ---
axes[1].plot(df["x_angle"], label=r"$\theta_x$ angle")
axes[1].plot(df["y_angle"], label=r"$\theta_y$ angle")
axes[1].plot(df["z_angle"], label=r"$\theta_z$ angle")
axes[1].set_title(r"Angle over Time")
axes[1].set_ylabel(r"Angle $\mathrm{[deg]}$")
axes[1].legend(loc="center left", bbox_to_anchor=(1.02, 0.5), borderaxespad=0)

# --- Velocity ---
axes[2].plot(df["x_vel"], label=r"$x$ velocity")
axes[2].set_title(r"Velocity in $x$ direction over Time")
axes[2].set_ylabel(r"$\dot{x}$ $[\mathrm{m/s}]$")
axes[2].legend(loc="center left", bbox_to_anchor=(1.02, 0.5), borderaxespad=0)
axes[2].set_xlabel("Time step")


# plt.tight_layout()
plt.subplots_adjust(right=0.8)  # leave space for legends on the right
plt.show()

### Action over Time

In [None]:
action_cols = [col for col in df.columns if "action" in col or "wheel_speed" in col]
axes = df[action_cols].plot(subplots=True, figsize=(10, 6), title="Action Over Time")

labels = {
    "action_0": r"Action $\text{[Nm]}$",
    "action_1": r"Action $\text{[Nm]}$",
    "wheel_speed_l": r"$\dot\theta_\text{left wheel} [\text{deg/s}]$",
    "wheel_speed_r": r" $\dot\theta_\text{right wheel} [\text{deg/s}]$",
}
for ax, col in zip(axes, action_cols):
    ax.set_ylabel(labels.get(col, col))  # Fallback to column name if label missing
    ax.legend(loc="upper right")
axes[-1].set_xlabel("Time step");

### Individual Penalties over Time

#### First, just the bounded sensor data *without* penalty factors $\lambda_{(i)}$

The bounded sensor data lies between $[0, 1]$ (for plotting here in $[-1, 0]$ actually)

In [None]:
info_cols = [col for col in df.columns if "bounded" in col]
axes = df[info_cols].plot(subplots=True, figsize=(10, 12), title="Sensor Data without penalty factors $\lambda_{(i)}$")

labels = {
    "bounded_dist": r"$f(x)$",
    "bounded_y_angle": r"$f(\theta_y)$",
    "bounded_wheel_l": r"$f\left( \dot \theta_\text{left wheel}\right)$",
    "bounded_wheel_r": r"$f\left( \dot \theta_\text{right wheel}\right)$",
    "bounded_z_angle": r"$f(\theta_z)$",
    "bounded_x_vel": r"$f(\dot x)$",
    "bounded_y_angle_vel": r"$f(\dot \theta_y)$",
}
for ax, col in zip(axes, info_cols):
    ax.set_ylabel(labels.get(col, col))  # Fallback to column name if label missing
    ax.legend(loc="lower right")

for ax in axes:
    ax.set_ylim(-1.025, 0.025)  # Adjust limits as needed
axes[-1].set_xlabel("Time step");

#### Now, *with* penalty factors $\lambda_{(i)}$

Now, the penalties are scaled with by their respective penalty factors $\lambda_{(i)}$ (but still between $[0, 1]$ or $[-1, 0]$ for plotting)
This shows each individual penalty influences the final reward.

In [None]:
info_cols = [col for col in df.columns if "penalty" in col or "info_" in col]
axes = df[info_cols].plot(subplots=True, figsize=(10, 12), title="Penalties' penalty factors $\lambda_{(i)}$")

labels = {
    "distance_penalty": r"$\lambda_x f(x)$",
    "y_angle_penalty": r"$\lambda_{\theta_y} f(\theta_y)$",
    "z_angle_penalty": r"$\lambda_{\theta_z} f(\theta_z)$",
    "wheel_l_penalty": r"$\lambda_{\dot \theta_\text{left wheel}} f\left( \dot \theta_\text{left wheel}\right)$",
    "wheel_r_penalty": r"$\lambda_{\dot \theta_\text{right wheel}} f\left( \dot \theta_\text{right wheel}\right)$",
    "x_vel_penalty": r"$\lambda_{\dot x} f(\dot x)$",
    "y_angle_vel_penalty": r"$\lambda_{\dot \theta_y} f(\dot \theta_y)$",
}
for ax, col in zip(axes, info_cols):
    ax.set_ylabel(labels.get(col, col))  # Fallback to column name if label missing
    ax.legend(loc="lower right")

for ax in axes:
    ax.set_ylim(-0.5, 0.025)  # Adjust limits as needed

axes[-1].set_xlabel("Time step");
