## Import Packages

In [9]:
import numpy as np
import matplotlib.pyplot as plt
from gymnasium.wrappers import TimeLimit

from common import *
from hyperparameters import *
from plotting import *

from dp import value_iteration, compute_policy
from mpc import CartpoleMPC
from q_learning import run_q_learning

## Evaluator Function (as a Helper)

In [10]:
def evaluate_method(name, returns):
    print(f"\n===== {name} =====")
    print(f"Mean       : {np.mean(returns):.4f}")
    print(f"Std        : {np.std(returns):.4f}")
    print(f"Min        : {np.min(returns):.4f}")
    print(f"Max        : {np.max(returns):.4f}")

## 1. DP Simulation

In [12]:
env = CartPoleEnv(x_threshold=X_LIMIT, theta_threshold_radians=THETA_LIMIT)
env = TimeLimit(env, max_episode_steps=MAX_EPISODE_STEPS)

print("Running Value Iteration...")
V = value_iteration()
policy_dp = compute_policy(V)

returns_dp = evaluate_agent(env, type="DP", policy=policy_dp)
evaluate_method("DP", returns_dp)

Running Value Iteration...


NotImplementedError: Quadratic cost function not implemented

## 2. MPC Simulation

In [13]:
env = CartPoleEnv(x_threshold=X_LIMIT, theta_threshold_radians=THETA_LIMIT, continuous_action=True)

env = TimeLimit(env, max_episode_steps=MAX_EPISODE_STEPS)

print("Running MPC...")
mpc = CartpoleMPC(H=5, max_iters=5)

returns_mpc = evaluate_agent(env, type="MPC", policy=mpc)
evaluate_method("MPC", returns_mpc)

Running MPC...


NotImplementedError: Quadratic cost function not implemented

## 3. Q-Learning Simulation

In [14]:
train_env = CartPoleEnv(x_threshold=X_LIMIT, theta_threshold_radians=THETA_LIMIT)
train_env = TimeLimit(train_env, max_episode_steps=MAX_EPISODE_STEPS)

print("Training Q-Learning...")
Q_learning = run_q_learning(train_env, TRAIN_TIMESTEPS_M)

returns_q = evaluate_agent(train_env, type="Q", Q = Q_learning)
evaluate_method("Q-Learning", returns_q)

Training Q-Learning...


NotImplementedError: Quadratic cost function not implemented

## 4. Visualization

In [15]:
plt.figure(figsize=(7,5))

plt.boxplot([returns_dp, returns_mpc, returns_q], labels=["DP", "MPC", "Q-Learning"])

plt.ylabel("Return")
plt.title("Performance Comparison")
plt.grid(True)
plt.legend(True)
plt.show()

NameError: name 'returns_dp' is not defined

<Figure size 700x500 with 0 Axes>