In [1]:
import gym
import matplotlib.pyplot as plt
%matplotlib inline

env = gym.make('CartPole-v0')
for i_episode in range(20):
    observation = env.reset()
    for t in range(100):
        env.render(mode='rgb_array', close=False)
#         print(observation)
        action = env.action_space.sample()
        observation, reward, done, info = env.step(action)
        if done:
#             print("Episode finished after {} timesteps".format(t+1))
            break

In [None]:
# Understanding the env "step" call and what it has
# link: https://gym.openai.com/docs/
# The step function returns 4 pieces of information
# 1. observation (object) -  environment specific object representing
#    your observation of the environment. For example; Pixel data from
#    a camera, joint angles and joint velocities of a robot, or the board
#    state in a board game
# 2. reward (float) - amount of reward achieved by the previous action.
#    The scale varies between environments, but the goal is always to
#    increase your total reward.
# 3. done (bool) - whether its time to "reset" the environment again.
#    Most (but not all) tasks are divided up into well-defined episodes,
#    and done being "True" indicates the episode has terminated.
#    For example, perhaps the pole tipped too far, or you lost
#    your last life.
# 4. info (dict) - diagnositc information useful for debugging. It can
#    sometimes be useful for learning (for example it might contain the
#    raw probabilities beind the environments last state chance).
#    However, official evaluations of your agent are not allowed
#    to use this for learning.

In [None]:
# Some Notes
# Every environemnt comes with "Space" objects that describe the
# valid actions and observations.
# ie: 
# import gym
# env = gym.make('CartPole-v0')
# print(env.action_space)
# > Discrete(2)
# print(env.observation_space)
# > Box(4,)

# The Discrete space allows for a fixed range of non-negative numebers,
# so in this case valid "action"s are either 0 or 1. The Box space
# represents an n-dimensional box, so valid observations will be an
# array of 4 numbers. We can also check the Box bounds:

# print(env.observation_space.high)
# > array([2.4, inf, 0.2094, inf])
# print(env.observation_space.low)
# > array([-2.4, -inf, -0.2094, -inf])

# this introspection can be helpful to write generic code that works
# for many different environments. Box and Discrete are the most common
# "Space"s. You can sample from a "Space" or check that something
# belongs to it:

# ie: 
# from gym import spaces
# space = spaces.Discrete(8) # Set with 8 elements {0, 1, 2, ..., 7}
# x = space.sample()
# assert space.contains(x)
# assert space.n == 8

In [None]:
# For the CartPole-v0 one of the actions applies force to the
# left, and one of them applies force to the right.