<h1 style="text-align:center;"><span style="color:blue;">Reinforcement Learning with OpenAI Gym</span></h1><br />

<center><img src ="area-51.jpg" width="500" /></center>

- **A** - Action
- **R** - Reward
- **E** - Environment
- **A** - Agent

<img src="RL_illustration.png" />

- [Cart Pole Control Environment in OpenAI Gym - Introduction to OpenAI Gym](https://aleksandarhaber.com/cart-pole-control-environment-in-openai-gym-gymnasium-introduction-to-openai-gym/)


In [22]:
!pip install pygame



In [23]:
# Import the libraries {!pip install as needed}
import gym
import numpy as np
import time
from pygame import gfxdraw

## Create an Environment for our Cart Pole

[gymnasium.Env](https://gymnasium.farama.org/api/env/)

In [24]:
env = gym.make('CartPole-v1', render_mode='human')

(state,_) = env.reset()

### The States - what is happening in each frame

<img src="cart-states.png" />

#### Four States

1. x Position of the cart
2. &#7819; Velocity of the cart
3. &#952; Pole angle
4. &#952; Angular Velocity (Theta dot)

In [38]:
# Run the simulation
env.render()

In [39]:
# Push the cart in one direction (0 = left)
env.step(0)


(array([-0.00814035, -0.24315004, -0.0466187 ,  0.285461  ], dtype=float32),
 1.0,
 False,
 False,
 {})

In [40]:
# Observation space limits
env.observation_space

Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32)

In [41]:
# upper limit
env.observation_space.high

array([4.8000002e+00, 3.4028235e+38, 4.1887903e-01, 3.4028235e+38],
      dtype=float32)

In [42]:
# lower limit
env.observation_space.low

array([-4.8000002e+00, -3.4028235e+38, -4.1887903e-01, -3.4028235e+38],
      dtype=float32)

In [43]:
# action space
env.action_space

Discrete(2)

In [44]:
# All specifications
env.spec

EnvSpec(id='CartPole-v1', entry_point='gym.envs.classic_control.cartpole:CartPoleEnv', reward_threshold=475.0, nondeterministic=False, max_episode_steps=500, order_enforce=True, autoreset=False, disable_env_checker=False, apply_api_compatibility=False, kwargs={'render_mode': 'human'}, namespace=None, name='CartPole', version=1)

In [45]:
# Maximum number of steps per episode
env.spec.max_episode_steps

500

In [46]:
# Reward threshold per episode
env.spec.reward_threshold

475.0

In [None]:
# Simulate the environment
episodeNumber=10000
timeSteps=100

In [None]:
# Run the simulation
for episodeIndex in range(episodeNumber):
    initial_state=env.reset()
    print(episodeIndex)
    appendedObservations = []
    for timeIndex in range(timeSteps):
        print(timeIndex)
        random_action = env.action_space.sample()
        observation, reward, terminated, truncated, info = env.step(random_action)
        appendedObservations.append(observation)
        time.sleep(0.01)
        if (terminated):
            time.sleep(0.1)
            break
env.close()

0
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
1
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
2
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
3
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
4
0
1
2
3
4
5
6
7
8
9
10
11
12
5
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
6
0
1
2
3
4
5
6
7
8
9
10
7
0
1
2
3
4
5
6
7
8
9
10
11
12
13
8
0
1
2
3
4
5
6
7
8
9
10
9
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
10
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
11
0
1
2
3
4
5
6
7
8
9
10
11
12
12
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
13
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
14
0
1
2
3
4
5
6
7
8
9
10
11
12
13
15
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
17
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
18
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
19
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
1

11
12
13
14
15
16
17
18
19
20
21
22
23
24
138
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
139
0
1
2
3
4
5
6
7
8
9
10
140
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
141
0
1
2
3
4
5
6
7
8
9
142
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
143
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
144
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
145
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
146
0
1
2
3
4
5
6
7
147
0
1
2
3
4
5
6
7
8
9
10
148
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
149
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
150
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
151
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
152
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
153
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
154
0
1
2
3
4
5
6
7
8
9
10
11
155
0
1
2
3

In [None]:
env.close()