<a href="https://colab.research.google.com/github/nicoRomeroCuruchet/DynamicProgramming/blob/main/testing_bary.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pickle
import numpy as np
from PolicyIteration import PolicyIteration
from utils.utils import plot_2D_value_function,\
                        plot_3D_value_function,\
                        test_enviroment

# CartPoleEnv 

### Observation Space

The observation is a `ndarray` with shape `(4,)` with the values corresponding to the following positions and velocities:

| Num | Observation           | Min                 | Max               |
|-----|-----------------------|---------------------|-------------------|
| 0   | Cart Position         | -4.8                | 4.8               |
| 1   | Cart Velocity         | -Inf                | Inf               |
| 2   | Pole Angle            | ~ -0.418 rad (-24°) | ~ 0.418 rad (24°) |
| 3   | Pole Angular Velocity | -Inf                | Inf               |

### Action Space

The action is a `ndarray` with shape `(1,)` which can take values `{0, 1}` indicating the direction
of the fixed force the cart is pushed with.

- 0: Push cart to the left
- 1: Push cart to the right

In [None]:
# Train cartpole environment:
from classic_control.cartpole import CartPoleEnv

env = CartPoleEnv(sutton_barto_reward=True)
# position thresholds:
x_lim         = 2.4
theta_lim     = 0.418 
# velocity thresholds:
x_dot_lim     = 3.1
theta_dot_lim = 3.1

bins_space = {
    "x_space"         : np.linspace(-x_lim, x_lim, 10,  dtype=np.float32),                     # position space         (0)
    "x_dot_space"     : np.linspace(-x_dot_lim, x_dot_lim, 7,  dtype=np.float32),              # velocity space         (1)
    "theta_space"     : np.linspace(-theta_lim, theta_lim, 10, dtype=np.float32),              # angle space            (2)
    "theta_dot_space" : np.linspace(-theta_dot_lim, theta_dot_lim, 7, dtype=np.float32),       # angular velocity space (3)
}

pi = PolicyIteration(
    env=env, 
    bins_space=bins_space,
    action_space=np.array([0, 1], dtype=np.int32),
    gamma=0.99,
    theta=1e-3
)

pi.run()

In [None]:
# Test cartpole environment:

with open(env.__class__.__name__ + ".pkl", "rb") as f:
    pi = pickle.load(f)

test_enviroment(CartPoleEnv(sutton_barto_reward=True, render_mode="human"), pi)

## Observation Space

The observation is a `ndarray` with shape `(2,)` where the elements correspond to the following:

| Num | Observation                          | Min   | Max  | Unit         |
|-----|--------------------------------------|-------|------|--------------|
| 0   | position of the car along the x-axis | -1.2  | 0.6  | position (m) |
| 1   | velocity of the car                  | -0.07 | 0.07 | velocity (v) |

## Action Space

There are 3 discrete deterministic actions:

- 0: Accelerate to the left
- 1: Don't accelerate
- 2: Accelerate to the right


In [None]:
from classic_control.continuous_mountain_car import Continuous_MountainCarEnv

env=Continuous_MountainCarEnv()

bins_space = {
    "x_space":     np.linspace(env.min_position, env.max_position, 100,      dtype=np.float32),    # position space    (0)
    "x_dot_space": np.linspace(-abs(env.max_speed), abs(env.max_speed), 100, dtype=np.float32),    # velocity space    (1)
}

pi = PolicyIteration(
    env=env, 
    bins_space=bins_space,
    action_space=np.linspace(-1.0, +1.0,9, dtype=np.float32),
    gamma=0.99,
    theta=1e-3,
)
#pi.run() # 250*

In [None]:
# Test mountain car environment:
with open(env.__class__.__name__ + ".pkl", "rb") as f:
    pi: PolicyIteration = pickle.load(f)

test_enviroment(Continuous_MountainCarEnv(render_mode="human"), pi)

In [None]:
# graph the value function of the mountain car environment:
plot_3D_value_function(pi.value_function)
plot_2D_value_function(pi.value_function)

In [None]:
import cProfile, pstats, io
from pstats import SortKey
pr = cProfile.Profile()
pr.enable()
pi.run()
pr.disable()
s = io.StringIO()
sortby = SortKey.CUMULATIVE
ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
ps.print_stats()
print(s.getvalue())

In [None]:
import cProfile, pstats, io
from pstats import SortKey
pr = cProfile.Profile()
pr.enable()
pi.run()
pr.disable()
s = io.StringIO()
sortby = SortKey.CUMULATIVE
ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
ps.print_stats()
print(s.getvalue())

In [2]:
import numpy as np
from PolicyIteration import PolicyIteration
from src.reduced_symmetric_glider_pullout import ReducedSymmetricGliderPullout


glider = ReducedSymmetricGliderPullout()

bins_space = {
    "flight_path_angle": np.linspace(-np.pi, .001, 50,      dtype=np.float32),    # Flight Path Angle (γ)    (0)
    "airspeed_norm":     np.linspace(0.7, 4,       50,      dtype=np.float32),    # Air Speed         (V)    (1)
}

pi = PolicyIteration(
    env=glider, 
    bins_space=bins_space,
    action_space=np.linspace(-0.5, 1.0, 15, dtype=np.float32),
    gamma=0.99,
    theta=1e-3,
)


pi.run()

[32m2024-09-13 19:44:21.840[0m | [1mINFO    [0m | [36mPolicyIteration[0m:[36m__init__[0m:[36m130[0m - [1mLower bounds: [-3.1415927  0.7      ][0m
[32m2024-09-13 19:44:21.841[0m | [1mINFO    [0m | [36mPolicyIteration[0m:[36m__init__[0m:[36m131[0m - [1mUpper bounds: [1.e-03 4.e+00][0m
[32m2024-09-13 19:44:21.842[0m | [1mINFO    [0m | [36mPolicyIteration[0m:[36m__init__[0m:[36m137[0m - [1mCreating Delaunay triangulation...[0m
[32m2024-09-13 19:44:21.872[0m | [1mINFO    [0m | [36mPolicyIteration[0m:[36m__init__[0m:[36m139[0m - [1mDelaunay triangulation created.[0m
[32m2024-09-13 19:44:21.875[0m | [1mINFO    [0m | [36mPolicyIteration[0m:[36m__init__[0m:[36m166[0m - [1mPolicy Iteration was correctly initialized.[0m
[32m2024-09-13 19:44:21.875[0m | [1mINFO    [0m | [36mPolicyIteration[0m:[36m__init__[0m:[36m167[0m - [1mThe enviroment name is: ReducedSymmetricGliderPullout[0m
[32m2024-09-13 19:44:21.876[0m | [1mINFO    

In [None]:
flight_path_angle = pi.states_space[:,0]
airspeed_norm = pi.states_space[:,1]

In [None]:
airspeed_norm