# Introduction to Cart Pole Problem/env

### Dependencies

In [2]:
import gymnasium as gym

In [3]:
env = gym.make('CartPole-v1',render_mode="human")

![title](images/CartPole.png)

In [6]:
env.reset()

(array([-0.02881987, -0.01002407,  0.02031628, -0.0449642 ], dtype=float32),
 {})

In [16]:
env.step(0)

(array([-0.12894322, -0.4109334 ,  0.17053409,  0.78604597], dtype=float32),
 1.0,
 False,
 False,
 {})

![title](images/observationSpace.png)

### Episode End:- 
The episode ends if any one of the following occurs:
1. Termination: Pole Angle is greater than ±12°
2. Termination: Cart Position is greater than ±2.4 (center of the cart reaches the edge of the display)
3. Truncation: Episode length is greater than 500 (200 for v0)

In [85]:
action = 0 #left
action = 1 #right
observation, reward, terminated, truncated, info = env.step(action)

In [86]:
#object contains the observation space.
#cart position
#cart velocity
#pole angle
#pole angular velocity
observation

array([-1.2891004, -4.247897 ,  2.3218572,  9.060686 ], dtype=float32)

In [88]:
#if the pole is still balanced and truncated or terminated is not True
#reward of 1.0 is returned else it is zero
reward

0.0

In [89]:
#if the agent crashes or succeeds in completing a task, env is terminated
terminated

True

In [90]:
#if the environment reaches its end state (fixed number of timesteps)
truncated

False

![title](images/EnvInteraction.png)

### Basic Policy

In [17]:
#move left if pole is tilted towards left 
#or else move to right if pole is tilted towards right
def basic_policy(obs):
    angle = obs[2]
    return 0 if angle < 0 else 1
    
env = gym.make("CartPole-v1",render_mode="human")
totals = []
with env:
    for episode in range(5):
        print(f"Game:{episode}")
        episode_rewards = 0
        obs = env.reset()[0]
        for step in range(200):
            #print(f"Step Num: {step}")
            action = basic_policy(obs)
            obs, reward, done, state, info = env.step(action)
            env.render()
            episode_rewards += reward
            if done:
                break
        totals.append(episode_rewards)
        print(f"Steps: {step}")
    env.close()

Game:0
Steps: 51
Game:1
Steps: 56
Game:2
Steps: 39
Game:3
Steps: 61
Game:4
Steps: 24


In [18]:
import numpy as np
np.mean(totals), np.std(totals), np.min(totals), np.max(totals)

(47.2, 13.287588193498472, 25.0, 62.0)