In [12]:
import gym
import random

#### MountainCar

In [15]:
env_name = "MountainCar-v0"
env = gym.make(env_name)
print("Observation space:", env.observation_space)
print("Action space:", env.action_space)

# Check that actions are discrete (like they were for CartPole)

Observation space: Box(-1.2000000476837158, 0.6000000238418579, (2,), float32)
Action space: Discrete(3)


In [16]:
type(env.action_space)

gym.spaces.discrete.Discrete

In [17]:
class Agent():
    def __init__(self, env):
        self.action_size = env.action_space.n
        print("action size:", self.action_size)
        
    def get_action(self, state):
        action = random.choice(range(self.action_size))
        return action

In [18]:
# reset the state
agent = Agent(env)
state = env.reset()

# loop for 200 timesteps, taking a random action each time
for i in range(200):
    action = agent.get_action(state)
    state, reward, done, info = env.step(action)
    env.step(action)
    env.render()

action size: 3


### MountainCarContinuous

In [19]:
env_name = "MountainCarContinuous-v0"
env = gym.make(env_name)
print("Observation space:", env.observation_space)
print("Action space:", env.action_space)

# Action space is now continuous

Observation space: Box(-1.2000000476837158, 0.6000000238418579, (2,), float32)
Action space: Box(-1.0, 1.0, (1,), float32)


We want to print out the env.action_space attributes:

In [20]:
env.action_space.low, env.action_space.high

(array([-1.], dtype=float32), array([1.], dtype=float32))

They are both single element arrays, low and high limits.

Need numpys random continuous distribution function

In [8]:
import numpy as np

In [9]:
class Agent():
    def __init__(self, env):
        self.action_low = env.action_space.low
        self.action_high = env.action_space.high
        self.action_shape = env.action_space.shape
        
    def get_action(self, state):
        action = np.random.uniform(self.action_low,
                                  self.action_high,
                                  self.action_shape)
        return action

In [10]:
# reset the state
agent = Agent(env)
state = env.reset()

# loop for 200 timesteps, taking a random action each time
for i in range(200):
    action = agent.get_action(state)
    state, reward, done, info = env.step(action)
    env.step(action)
    env.render()

In [11]:
state = env.reset()

### Finale

In [13]:
#env_name = "CartPole-v1"
#env_name = "MountainCar-v0"
#env_name = "MountainCarContinuous-v0"
#env_name = "Acrobot-v1"
#env_name = "Pendulum-v0"
env_name = "FrozenLake-v0"

env = gym.make(env_name)
print("Observation space:", env.observation_space)
print("Action space:", env.action_space)

type(env.action_space)

Observation space: Discrete(16)
Action space: Discrete(4)


gym.spaces.discrete.Discrete

In [14]:
class Agent():
    def __init__(self, env):
        self.is_discrete = \
            type(env.action_space) == gym.spaces.discrete.Discrete
        
        if self.is_discrete:
            self.action_size = env.action_space.n
            print("Action size:", self.action_size)
            
        else:
            self.action_low = env.action_space.low
            self.action_high = env.action_space.high
            self.action_shape = env.action_space.shape
            print("Action range:", self.action_low, self.action_high)
        
    def get_action(self, state):
        if self.is_discrete:
            action = random.choice(range(self.action_size))
        else:
            action = np.random.uniform(self.action_low,
                                      self.action_high,
                                      self.action_shape)
        return action

In [15]:
# reset the state
agent = Agent(env)
state = env.reset()

# loop for 200 timesteps, taking a random action each time
for i in range(200):
    action = agent.get_action(state)
    state, reward, done, info = env.step(action)
    env.step(action)
    env.render()

Action size: 4
  (Left)
SFFF
[41mF[0mHFH
FFFH
HFFG
  (Left)
SFFF
[41mF[0mHFH
FFFH
HFFG
  (Right)
SFFF
F[41mH[0mFH
FFFH
HFFG
  (Right)
SFFF
F[41mH[0mFH
FFFH
HFFG
  (Up)
SFFF
F[41mH[0mFH
FFFH
HFFG
  (Right)
SFFF
F[41mH[0mFH
FFFH
HFFG
  (Down)
SFFF
F[41mH[0mFH
FFFH
HFFG
  (Down)
SFFF
F[41mH[0mFH
FFFH
HFFG
  (Down)
SFFF
F[41mH[0mFH
FFFH
HFFG
  (Right)
SFFF
F[41mH[0mFH
FFFH
HFFG
  (Up)
SFFF
F[41mH[0mFH
FFFH
HFFG
  (Right)
SFFF
F[41mH[0mFH
FFFH
HFFG
  (Down)
SFFF
F[41mH[0mFH
FFFH
HFFG
  (Down)
SFFF
F[41mH[0mFH
FFFH
HFFG
  (Up)
SFFF
F[41mH[0mFH
FFFH
HFFG
  (Up)
SFFF
F[41mH[0mFH
FFFH
HFFG
  (Right)
SFFF
F[41mH[0mFH
FFFH
HFFG
  (Left)
SFFF
F[41mH[0mFH
FFFH
HFFG
  (Right)
SFFF
F[41mH[0mFH
FFFH
HFFG
  (Right)
SFFF
F[41mH[0mFH
FFFH
HFFG
  (Up)
SFFF
F[41mH[0mFH
FFFH
HFFG
  (Up)
SFFF
F[41mH[0mFH
FFFH
HFFG
  (Down)
SFFF
F[41mH[0mFH
FFFH
HFFG
  (Down)
SFFF
F[41mH[0mFH
FFFH
HFFG
  (Left)
SFFF
F[41mH[0mFH
FFFH
HFFG
  (Up)
SFFF
F[41mH[0mFH
FFFH
HFFG
  (