In [1]:
import gymnasium as gym

In [2]:
#Create and load a simulation environment
env = gym.make("CartPole-v1", render_mode="human")

In [3]:
# Environment: CartPole
# Documentation Link: https://gymnasium.farama.org/environments/classic_control/cart_pole/
#
# Goal: Is to balance the pole on the cart by moving the cart left or right for a given episode
#
# Agent: Cart
#
# Actions: 0 ---- Left
#          1 ---- Right
#
# State: ["Cart Position","Cart Velocity","Pole Angle","poleVelocity"]
#
# Reward: 1 for every step taken such that the pole is balanced successfully.
#
# Termination Condition:
# 1. Pole Angle is greater than +-12 DEGREE
# 2. Cart Position is greater than +-2.4
# 3. Episode length greater than 500

# Common Functions used in Gymnasium Env

In [4]:
#How to get the initial state?
#To bring the agent to the start of the env

observation,info = env.reset()

  from pkg_resources import resource_stream, resource_exists


In [5]:
observation
# State: ["Cart Position","Cart Velocity","Pole Angle","poleVelocity"]

array([ 0.04899946,  0.01961563,  0.00404369, -0.00771572], dtype=float32)

In [6]:
env.action_space

Discrete(2)

In [7]:
#How to generate random action?
action = env.action_space.sample()
action

np.int64(1)

In [8]:
#How to submit action to the environment

observation, reward, terminatedStatus, truncatedStatus, info = env.step(action)

#observation --- state (current position of the agent)
#reward -------- The outcome achieved by the agent based on the current action (+1 or -1)
#terminatedStatus -- Whether termination condition is satisified or not (Binary outcome)
# truncatedStatus -- Whether episodes are complete or not
#info --- additional relevant info of the environment

observation, reward, terminatedStatus, truncatedStatus, info 

(array([ 0.04939177,  0.21467936,  0.00388937, -0.29912007], dtype=float32),
 1.0,
 False,
 False,
 {})

# Run a Single Episode

In [9]:
#initialize the state
observation,info = env.reset()

for episodeStep in range(10):
    #Choose a random action
    action = env.action_space.sample()

    #Supply action to the env
    newState,reward,isTerminated,isTruncated,info = env.step(action)

    #Print info
    print(f"Episode Step {episodeStep} Given Action {action} I got reward {reward} and next state {newState}")

    #Check for Termination
    if isTerminated:
        print("GAME OVER --- Terminated!!!")
        env.close()
        break

#Check for Truncation(Episode ended)
if isTruncated:
    print("Episode Over. Total Allowed Steps Done. Agent was able to balance pole successfully :)")

env.close()



Episode Step 0 Given Action 0 I got reward 1.0 and next state [-0.01756928 -0.17010969 -0.01793873  0.29960847]
Episode Step 1 Given Action 1 I got reward 1.0 and next state [-0.02097147  0.0252633  -0.01194656  0.00132242]
Episode Step 2 Given Action 0 I got reward 1.0 and next state [-0.02046621 -0.1696853  -0.01192011  0.29021224]
Episode Step 3 Given Action 0 I got reward 1.0 and next state [-0.02385991 -0.36463526 -0.00611587  0.579112  ]
Episode Step 4 Given Action 1 I got reward 1.0 and next state [-0.03115262 -0.16942814  0.00546637  0.28450873]
Episode Step 5 Given Action 1 I got reward 1.0 and next state [-0.03454118  0.02561541  0.01115655 -0.00644514]
Episode Step 6 Given Action 1 I got reward 1.0 and next state [-0.03402887  0.2205756   0.01102765 -0.29558727]
Episode Step 7 Given Action 0 I got reward 1.0 and next state [-0.02961736  0.02529819  0.0051159   0.00055311]
Episode Step 8 Given Action 1 I got reward 1.0 and next state [-0.0291114   0.2203464   0.00512696 -0.29

In [10]:
env.close()

# Run Multiple Episodes

In [11]:
import time
import gymnasium as gym


for episodeCount in range(1,11):
    
    #initialize the state
    env = gym.make("CartPole-v1", render_mode="human")
    observation,info = env.reset()

    for episodeStep in range(400):
        #Choose a random action
        action = env.action_space.sample()

        #Supply action to the env
        newState,reward,isTerminated,isTruncated,info = env.step(action)

        #Add small delay and call render to see game in execution
        time.sleep(0.02) #20ms delay
        env.render()

        #Print info
        print(f"Episode Step {episodeStep} Given Action {action} I got reward {reward} and next state {newState}")

        #Check for Termination
        if isTerminated:
            print("GAME OVER --- Terminated!!!")
            env.close()
            break

    #Check for Truncation(Episode ended)
    if isTruncated:
        ("Episode Over. Total Allowed Steps Done. Agent was able to balance pole successfully :)")

    env.close()

Episode Step 0 Given Action 1 I got reward 1.0 and next state [ 0.03702192  0.15751     0.0263678  -0.2640338 ]
Episode Step 1 Given Action 0 I got reward 1.0 and next state [ 0.04017212 -0.0379782   0.02108712  0.0368478 ]
Episode Step 2 Given Action 0 I got reward 1.0 and next state [ 0.03941255 -0.2333961   0.02182408  0.33610862]
Episode Step 3 Given Action 1 I got reward 1.0 and next state [ 0.03474463 -0.03859142  0.02854625  0.05038697]
Episode Step 4 Given Action 0 I got reward 1.0 and next state [ 0.0339728  -0.23411082  0.02955399  0.35193804]
Episode Step 5 Given Action 0 I got reward 1.0 and next state [ 0.02929059 -0.4296403   0.03659275  0.6537918 ]
Episode Step 6 Given Action 1 I got reward 1.0 and next state [ 0.02069778 -0.23504648  0.04966859  0.37285233]
Episode Step 7 Given Action 1 I got reward 1.0 and next state [ 0.01599685 -0.040664    0.05712564  0.09623476]
Episode Step 8 Given Action 0 I got reward 1.0 and next state [ 0.01518357 -0.2365562   0.05905033  0.40