In [1]:
import gymnasium as gym

In [2]:
#Create and load a simulation environment
env = gym.make("CartPole-v1", render_mode="human")

In [None]:
# Environment: CartPole
# Documentation Link: https://gymnasium.farama.org/environments/classic_control/cart_pole/
#
# Goal: Is to balance the pole on the cart by moving the cart left or right for a given episode
#
# Agent: Cart
#
# Actions: 0 ---- Left
#          1 ---- Right
#
# State: ["Cart Position","Cart Velocity","Pole Angle","poleVelocity"]
#
# Reward: 1 for every step taken such that the pole is balanced successfully.
#
# Termination Condition:
# 1. Pole Angle is greater than +-12 DEGREE
# 2. Cart Position is greater than +-2.4
# 3. Episode length greater than 500

# Common Functions used in Gymnasium Env

In [4]:
#How to get the initial state?
#To bring the agent to the start of the env

observation,info = env.reset()

In [None]:
observation
# State: ["Cart Position","Cart Velocity","Pole Angle","poleVelocity"]

array([-0.01543725, -0.0290997 ,  0.03162589,  0.0208516 ], dtype=float32)

In [6]:
env.action_space

Discrete(2)

In [25]:
#How to generate random action?
action = env.action_space.sample()
action

np.int64(1)

In [26]:
#How to submit action to the environment

observation, reward, terminatedStatus, truncatedStatus, info = env.step(action)

#observation --- state (current position of the agent)
#reward -------- The outcome achieved by the agent based on the current action (+1 or -1)
#terminatedStatus -- Whether termination condition is satisified or not (Binary outcome)
# truncatedStatus -- Whether episodes are complete or not
#info --- additional relevant info of the environment

observation, reward, terminatedStatus, truncatedStatus, info 

(array([-0.01601925,  0.16555476,  0.03204293, -0.26168764], dtype=float32),
 1.0,
 False,
 False,
 {})

# Run a Single Episode

In [3]:
#initialize the state
observation,info = env.reset()

for episodeStep in range(10):
    #Choose a random action
    action = env.action_space.sample()

    #Supply action to the env
    newState,reward,isTerminated,isTruncated,info = env.step(action)

    #Print info
    print(f"Episode Step {episodeStep} Given Action {action} I got reward {reward} and next state {newState}")

    #Check for Termination
    if isTerminated:
        print("GAME OVER --- Terminated!!!")
        env.close()
        break

#Check for Truncation(Episode ended)
if isTruncated:
    print("Episode Over. Total Allowed Steps Done. Agent was able to balance pole successfully :)")

env.close()



Episode Step 0 Given Action 1 I got reward 1.0 and next state [-0.01839833  0.23026982 -0.00155256 -0.25890252]
Episode Step 1 Given Action 0 I got reward 1.0 and next state [-0.01379293  0.03517007 -0.00673061  0.03329031]
Episode Step 2 Given Action 1 I got reward 1.0 and next state [-0.01308953  0.2303879  -0.0060648  -0.26150858]
Episode Step 3 Given Action 1 I got reward 1.0 and next state [-0.00848177  0.42559588 -0.01129497 -0.5560982 ]
Episode Step 4 Given Action 1 I got reward 1.0 and next state [ 3.0144049e-05  6.2087458e-01 -2.2416936e-02 -8.5231823e-01]
Episode Step 5 Given Action 0 I got reward 1.0 and next state [ 0.01244764  0.4260653  -0.0394633  -0.56676775]
Episode Step 6 Given Action 0 I got reward 1.0 and next state [ 0.02096894  0.2315185  -0.05079865 -0.28677395]
Episode Step 7 Given Action 0 I got reward 1.0 and next state [ 0.02559931  0.03715643 -0.05653413 -0.0105353 ]
Episode Step 8 Given Action 1 I got reward 1.0 and next state [ 0.02634244  0.23304166 -0.05

In [30]:
env.close()

In [6]:
import time
import gymnasium as gym

env = gym.make("CartPole-v1", render_mode="human")

for episodeCount in range(1,11):
    #initialize the state
    env = gym.make("CartPole-v1", render_mode="human")
    observation,info = env.reset()

    for episodeStep in range(400):
        #Choose a random action
        action = env.action_space.sample()

        #Supply action to the env
        newState,reward,isTerminated,isTruncated,info = env.step(action)

        #Add small delay and call render to see game in execution
        time.sleep(0.02) #20ms delay
        env.render()

        #Print info
        print(f"Episode Step {episodeStep} Given Action {action} I got reward {reward} and next state {newState}")

        #Check for Termination
        if isTerminated:
            print("GAME OVER --- Terminated!!!")
            env.close()
            break

    #Check for Truncation(Episode ended)
    if isTruncated:
        ("Episode Over. Total Allowed Steps Done. Agent was able to balance pole successfully :)")

    env.close()

Episode Step 0 Given Action 0 I got reward 1.0 and next state [-0.00258496 -0.22707945  0.03346527  0.32298362]
Episode Step 1 Given Action 1 I got reward 1.0 and next state [-0.00712655 -0.03244961  0.03992495  0.0410393 ]
Episode Step 2 Given Action 1 I got reward 1.0 and next state [-0.00777555  0.16207775  0.04074573 -0.23878442]
Episode Step 3 Given Action 1 I got reward 1.0 and next state [-0.00453399  0.35659465  0.03597004 -0.51834166]
Episode Step 4 Given Action 0 I got reward 1.0 and next state [ 0.0025979   0.16098522  0.02560321 -0.21454439]
Episode Step 5 Given Action 0 I got reward 1.0 and next state [ 0.00581761 -0.03449324  0.02131232  0.08610372]
Episode Step 6 Given Action 0 I got reward 1.0 and next state [ 0.00512774 -0.2299141   0.0230344   0.3854338 ]
Episode Step 7 Given Action 0 I got reward 1.0 and next state [ 5.2945939e-04 -4.2535535e-01  3.0743072e-02  6.8528944e-01]
Episode Step 8 Given Action 1 I got reward 1.0 and next state [-0.00797765 -0.2306734   0.04

# Run Multiple Episodes