<a href="https://colab.research.google.com/github/nicoRomeroCuruchet/DynamicProgramming/blob/main/testing_bary.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pickle
import numpy as np
from pathlib import Path
from utils.utils import test_enviroment
from PolicyIteration import PolicyIteration

# CartPoleEnv 

### Observation Space

The observation is a `ndarray` with shape `(4,)` with the values corresponding to the following positions and velocities:

| Num | Observation           | Min                 | Max               |
|-----|-----------------------|---------------------|-------------------|
| 0   | Cart Position         | -4.8                | 4.8               |
| 1   | Cart Velocity         | -Inf                | Inf               |
| 2   | Pole Angle            | ~ -0.418 rad (-24°) | ~ 0.418 rad (24°) |
| 3   | Pole Angular Velocity | -Inf                | Inf               |

### Action Space

The action is a `ndarray` with shape `(1,)` which can take values `{0, 1}` indicating the direction
of the fixed force the cart is pushed with.

- 0: Push cart to the left
- 1: Push cart to the right

In [None]:

from classic_control.cartpole import CartPoleEnv
# CartPole environment:
env = CartPoleEnv()
# position thresholds:
x_lim         = 2.4
theta_lim     = 0.418 
# velocity thresholds:
x_dot_lim     = 3.1
theta_dot_lim = 3.1

bins_space = {
    "x_space"         : np.linspace(-x_lim, x_lim, 10,  dtype=np.float32),                     # position space          (0)
    "x_dot_space"     : np.linspace(-x_dot_lim, x_dot_lim, 10,  dtype=np.float32),             # velocity space          (1)
    "theta_space"     : np.linspace(-theta_lim, theta_lim, 10, dtype=np.float32),              # angle space             (2)
    "theta_dot_space" : np.linspace(-theta_dot_lim, theta_dot_lim, 10, dtype=np.float32),      # angular velocity space  (3)
}

pi = PolicyIteration(
    env=env, 
    bins_space=bins_space,
    action_space=np.array([0, 1], dtype=np.int32),

)

pi.run()

In [None]:
# Test cartpole environment:
pi = PolicyIteration.load(Path("CartPoleEnv_policy.pkl"))
test_enviroment(CartPoleEnv(sutton_barto_reward=True, render_mode="human"), pi)

## Observation Space

The observation is a `ndarray` with shape `(2,)` where the elements correspond to the following:

| Num | Observation                          | Min   | Max  | Unit         |
|-----|--------------------------------------|-------|------|--------------|
| 0   | position of the car along the x-axis | -1.2  | 0.6  | position (m) |
| 1   | velocity of the car                  | -0.07 | 0.07 | velocity (v) |

## Action Space

There are 3 discrete deterministic actions:

- 0: Accelerate to the left
- 1: Don't accelerate
- 2: Accelerate to the right


In [2]:
from classic_control.continuous_mountain_car import Continuous_MountainCarEnv
env=Continuous_MountainCarEnv()

bins_space = {
    "x_space":     np.linspace(env.min_position, env.max_position, 100,      dtype=np.float32),    # position space    (0)
    "x_dot_space": np.linspace(-abs(env.max_speed), abs(env.max_speed), 100, dtype=np.float32),    # velocity space    (1)
}

pi = PolicyIteration(
    env=env, 
    bins_space=bins_space,
    action_space=np.linspace(-1.0, +1.0, 9, dtype=np.float32)
)
pi.run()

[32m2025-03-13 17:25:44.834[0m | [1mINFO    [0m | [36mPolicyIteration[0m:[36m_initialize_state_space[0m:[36m91[0m - [1mLower bounds: [-1.2  -0.07][0m
[32m2025-03-13 17:25:44.835[0m | [1mINFO    [0m | [36mPolicyIteration[0m:[36m_initialize_state_space[0m:[36m92[0m - [1mUpper bounds: [0.6  0.07][0m
[32m2025-03-13 17:25:44.836[0m | [1mINFO    [0m | [36mPolicyIteration[0m:[36m__post_init__[0m:[36m61[0m - [1mInitialized Policy Iteration for Continuous_MountainCarEnv[0m
[32m2025-03-13 17:25:44.837[0m | [34m[1mDEBUG   [0m | [36mPolicyIteration[0m:[36m__post_init__[0m:[36m62[0m - [34m[1mState space shape: (10000, 2)[0m
[32m2025-03-13 17:25:44.837[0m | [1mINFO    [0m | [36mPolicyIteration[0m:[36mrun[0m:[36m315[0m - [1mCreating Delaunay triangulation over the state space...[0m
[32m2025-03-13 17:25:44.941[0m | [1mINFO    [0m | [36mPolicyIteration[0m:[36mrun[0m:[36m317[0m - [1mDelaunay triangulation created.[0m
[32m2025-03-

In [3]:
# Test mountain car environment:
pi = PolicyIteration.load(Path("Continuous_MountainCarEnv_policy.pkl"))
test_enviroment(Continuous_MountainCarEnv(render_mode="human"), pi)

Episode 0 finished after 122 timesteps
Total reward: 88.52500000000002
Episode 1 finished after 119 timesteps
Total reward: 88.93125000000002


KeyboardInterrupt: 

In [None]:
import pstats
from pstats import SortKey

p = pstats.Stats("profile-results.prof")
p.strip_dirs().sort_stats(SortKey.CUMULATIVE).print_stats(20)  # Top 20 functions by cumulative time