In [13]:
import gym
import matplotlib.pyplot as plt
import numpy as np
from pprint import pprint

print(f"OpenAI Gym version: {gym.__version__}")

OpenAI Gym version: 0.17.2


In [14]:
all_envs = gym.envs.registry.all()
env_ids = [env.id for env in all_envs]

print(f"Number of envs: {len(all_envs)}")
print(f"Examples of envs:")
pprint(env_ids[:10])

Number of envs: 859
Examples of envs:
['Copy-v0',
 'RepeatCopy-v0',
 'ReversedAddition-v0',
 'ReversedAddition3-v0',
 'DuplicatedInput-v0',
 'Reverse-v0',
 'CartPole-v0',
 'CartPole-v1',
 'MountainCar-v0',
 'MountainCarContinuous-v0']


In [16]:
env_name = "CartPole-v1"
env = gym.make(env_name)

print(f"Env: {env_name}")
print(f"Observation space: {env.observation_space}")
print(f"Observation space shape: {env.observation_space.shape}")
print(f"Observation space high values: {env.observation_space.high}")
print(f"Observation space low values: {env.observation_space.low}")

print(f"Action space: {env.action_space}")
print(f"Action space shape: {env.action_space.n}")

print(f"Max episode steps: {env.spec.max_episode_steps}")
print(f"Reward threshold: {env.spec.reward_threshold}")
print(f"Nondeterminicity: {env.spec.nondeterministic}")

Env: CartPole-v1
Observation space: Box(4,)
Observation space shape: (4,)
Observation space high values: [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38]
Observation space low values: [-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38]
Action space: Discrete(2)
Action space shape: 2
Max episode steps: 500
Reward threshold: 475.0
Nondeterminicity: False


In [17]:
env.seed(42)
state = env.reset()

print(f"State type: {type(state)}")
print(f"State shape: {state.shape}")
print(f"State: {state}")

State type: <class 'numpy.ndarray'>
State shape: (4,)
State: [-0.01258566 -0.00156614  0.04207708 -0.00180545]


In [19]:
action = env.action_space.sample()  # 0 = left, 1 = right

print(f"Sampled action: {action}")

Sampled action: 1


In [20]:
state, reward, done, info = env.step(action)

print(f"State: {state}")
print(f"Reward: {reward}")
print(f"Done: {done}")
print(f"Info: {info}")

State: [-0.01261699  0.19292789  0.04204097 -0.28092127]
Reward: 1.0
Done: False
Info: {}


In [24]:
num_episodes = 10

for episode in range(num_episodes):
    episode_reward = 0
    done = False
    state = env.reset()

    while not done:
        action = env.action_space.sample()
        state, reward, done, _ = env.step(action)
        episode_reward += reward
    
    print(f"Episode: {episode + 1}, reward: {episode_reward}")

Episode: 1, reward: 16.0
Episode: 2, reward: 21.0
Episode: 3, reward: 30.0
Episode: 4, reward: 23.0
Episode: 5, reward: 17.0
Episode: 6, reward: 13.0
Episode: 7, reward: 13.0
Episode: 8, reward: 14.0
Episode: 9, reward: 47.0
Episode: 10, reward: 13.0
