Skip to content

Commit

Permalink
Switch to a global PRNG for action/observation spaces (#144)
Browse files Browse the repository at this point in the history
  • Loading branch information
gdb committed May 31, 2016
1 parent 5bb2337 commit 8a535ca
Show file tree
Hide file tree
Showing 32 changed files with 198 additions and 179 deletions.
6 changes: 3 additions & 3 deletions gym/envs/algorithmic/algorithmic_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,14 @@ def __init__(self, inp_dim=1, base=10, chars=False):
AlgorithmicEnv.current_length = 2
tape_control = []

self.action_space = Tuple(([Discrete(2 * self.inp_dim), Discrete(2), Discrete(self.base)]))
self.observation_space = Discrete(self.base + 1)

self._seed()
self.reset()

def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)

self.action_space = Tuple(([Discrete(2 * self.inp_dim, np_random=self.np_random), Discrete(2, np_random=self.np_random), Discrete(self.base, np_random=self.np_random)]))
self.observation_space = Discrete(self.base + 1, np_random=self.np_random)
return [seed]

def _get_obs(self, pos=None):
Expand Down
22 changes: 11 additions & 11 deletions gym/envs/atari/atari_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,17 @@ def __init__(self, game='pong', obs_type='ram'):

self._seed()

self._action_set = self.ale.getMinimalActionSet()
self.action_space = spaces.Discrete(len(self._action_set))

(screen_width,screen_height) = self.ale.getScreenDims()
if self._obs_type == 'ram':
self.observation_space = spaces.Box(low=np.zeros(128), high=np.zeros(128)+255)
elif self._obs_type == 'image':
self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3))
else:
raise error.Error('Unrecognized observation type: {}'.format(self._obs_type))

def _seed(self, seed=None):
self.np_random, seed1 = seeding.np_random(seed)
# Derive a random seed. This gets passed as a uint, but gets
Expand All @@ -50,17 +61,6 @@ def _seed(self, seed=None):
# Empirically, we need to seed before loading the ROM.
self.ale.setInt(b'random_seed', seed2)
self.ale.loadROM(self.game_path)
self._action_set = self.ale.getMinimalActionSet()

self.action_space = spaces.Discrete(len(self._action_set), np_random=self.np_random)

(screen_width,screen_height) = self.ale.getScreenDims()
if self._obs_type == 'ram':
self.observation_space = spaces.Box(low=np.zeros(128), high=np.zeros(128)+255, np_random=self.np_random)
elif self._obs_type == 'image':
self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3), np_random=self.np_random)
else:
raise error.Error('Unrecognized observation type: {}'.format(self._obs_type))
return [seed1, seed2]

def _step(self, a):
Expand Down
10 changes: 5 additions & 5 deletions gym/envs/board_game/go.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,16 +159,16 @@ def __init__(self, player_color, opponent, observation_type, illegal_move_mode,
raise error.Error('Unsupported observation type: {}'.format(self.observation_type))
self.reset()

shape = pachi_py.CreateBoard(self.board_size).encode().shape
self.observation_space = spaces.Box(np.zeros(shape), np.ones(shape))
# One action for each board position, pass, and resign
self.action_space = spaces.Discrete(self.board_size**2 + 2)

def _seed(self, seed=None):
self.np_random, seed1 = seeding.np_random(seed)
# Derive a random seed.
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
pachi_py.pachi_srand(seed2)

shape = pachi_py.CreateBoard(self.board_size).encode().shape
self.observation_space = spaces.Box(np.zeros(shape), np.ones(shape), np_random=self.np_random)
# One action for each board position, pass, and resign
self.action_space = spaces.Discrete(self.board_size**2 + 2, np_random=self.np_random)
return [seed1, seed2]

def _reset(self):
Expand Down
11 changes: 6 additions & 5 deletions gym/envs/board_game/hex.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,16 +56,17 @@ def __init__(self, player_color, opponent, observation_type, illegal_move_mode,

if self.observation_type != 'numpy3c':
raise error.Error('Unsupported observation type: {}'.format(self.observation_type))

# One action for each board position and resign
self.action_space = spaces.Discrete(self.board_size ** 2 + 1)
observation = self.reset()
self.observation_space = spaces.Box(np.zeros(observation.shape), np.ones(observation.shape))

self._seed()

def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)

# One action for each board position and resign
self.action_space = spaces.Discrete(self.board_size ** 2 + 1, np_random=self.np_random)
observation = self.reset()
self.observation_space = spaces.Box(np.zeros(observation.shape), np.ones(observation.shape), np_random=self.np_random)

# Update the random policy if needed
if isinstance(self.opponent, str):
if self.opponent == 'random':
Expand Down
7 changes: 4 additions & 3 deletions gym/envs/box2d/bipedal_walker.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,11 +97,12 @@ def __init__(self):
self.prev_shaping = None
self._reset()

high = np.array([np.inf]*24)
self.action_space = spaces.Box(np.array([-1,-1,-1,-1]), np.array([+1,+1,+1,+1]))
self.observation_space = spaces.Box(-high, high)

def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
high = np.array([np.inf]*24)
self.action_space = spaces.Box(np.array([-1,-1,-1,-1]), np.array([+1,+1,+1,+1]), np_random=self.np_random)
self.observation_space = spaces.Box(-high, high, np_random=self.np_random)
return [seed]

def _destroy(self):
Expand Down
5 changes: 3 additions & 2 deletions gym/envs/box2d/car_racing.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,10 +117,11 @@ def __init__(self):
self.reward = 0.0
self.prev_reward = 0.0

self.action_space = spaces.Box( np.array([-1,0,0]), np.array([+1,+1,+1])) # steer, gas, brake
self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3))

def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
self.action_space = spaces.Box( np.array([-1,0,0]), np.array([+1,+1,+1]), np_random=self.np_random) # steer, gas, brake
self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), np_random=self.np_random)
return [seed]

def _destroy(self):
Expand Down
10 changes: 5 additions & 5 deletions gym/envs/box2d/lunar_lander.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,14 +88,14 @@ def __init__(self):
self.prev_reward = None
self._reset()

def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)

# useful range is -1 .. +1
high = np.array([np.inf]*8)
# nop, fire left engine, main engine, right engine
self.action_space = spaces.Discrete(4, np_random=self.np_random)
self.observation_space = spaces.Box(-high, high, np_random=self.np_random)
self.action_space = spaces.Discrete(4)
self.observation_space = spaces.Box(-high, high)

def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]

def _destroy(self):
Expand Down
9 changes: 4 additions & 5 deletions gym/envs/classic_control/acrobot.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,15 +80,14 @@ class AcrobotEnv(core.Env):

def __init__(self):
self.viewer = None
high = np.array([np.pi, np.pi, self.MAX_VEL_1, self.MAX_VEL_2])
low = -high
self.observation_space = spaces.Box(low, high)
self.action_space = spaces.Discrete(3)
self._seed()

def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)

high = np.array([np.pi, np.pi, self.MAX_VEL_1, self.MAX_VEL_2])
low = -high
self.observation_space = spaces.Box(low, high, np_random=self.np_random)
self.action_space = spaces.Discrete(3, np_random=self.np_random)
return [seed]

def _reset(self):
Expand Down
9 changes: 5 additions & 4 deletions gym/envs/classic_control/cartpole.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ def __init__(self):
self.theta_threshold_radians = 12 * 2 * math.pi / 360
self.x_threshold = 2.4

# Angle limit set to 2 * theta_threshold_radians so failing observation is still within bounds
high = np.array([self.x_threshold, np.inf, self.theta_threshold_radians * 2, np.inf])
self.action_space = spaces.Discrete(2)
self.observation_space = spaces.Box(-high, high)

self._seed()
self.reset()
self.viewer = None
Expand All @@ -40,10 +45,6 @@ def __init__(self):

def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
# Angle limit set to 2 * theta_threshold_radians so failing observation is still within bounds
high = np.array([self.x_threshold, np.inf, self.theta_threshold_radians * 2, np.inf])
self.action_space = spaces.Discrete(2, np_random=self.np_random)
self.observation_space = spaces.Box(-high, high, np_random=self.np_random)
return [seed]

def _step(self, action):
Expand Down
5 changes: 3 additions & 2 deletions gym/envs/classic_control/mountain_car.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,14 @@ def __init__(self):

self.viewer = None

self.action_space = spaces.Discrete(3)
self.observation_space = spaces.Box(self.low, self.high)

self._seed()
self.reset()

def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
self.action_space = spaces.Discrete(3, np_random=self.np_random)
self.observation_space = spaces.Box(self.low, self.high, np_random=self.np_random)
return [seed]

def _step(self, action):
Expand Down
9 changes: 5 additions & 4 deletions gym/envs/classic_control/pendulum.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,15 @@ def __init__(self):
self.max_torque=2.
self.dt=.05
self.viewer = None

high = np.array([1., 1., self.max_speed])
self.action_space = spaces.Box(low=-self.max_torque, high=self.max_torque, shape=(1,))
self.observation_space = spaces.Box(low=-high, high=high)

self._seed()

def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)

high = np.array([1., 1., self.max_speed])
self.action_space = spaces.Box(low=-self.max_torque, high=self.max_torque, shape=(1,), np_random=self.np_random)
self.observation_space = spaces.Box(low=-high, high=high, np_random=self.np_random)
return [seed]

def _step(self,u):
Expand Down
17 changes: 8 additions & 9 deletions gym/envs/doom/doom_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,15 +54,14 @@ def __init__(self):
self.viewer = None
self.game.init()
self.game.new_episode()

# 3 allowed actions [0, 9, 10] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))

self._seed()

def _seed(self, seed=None):
np_random, seed1 = seeding.np_random(seed)
# Derive a random seed.
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
self.game.set_seed(seed2)

# 3 allowed actions [0, 9, 10] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3), np_random=np_random)
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
return [seed1, seed2]
seed = seeding.hash_seed(seed) % 2**32
self.game.set_seed(seed)
return [seed]
16 changes: 7 additions & 9 deletions gym/envs/doom/doom_corridor.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,15 +56,13 @@ def __init__(self):
self.game.init()
self.game.new_episode()

# action indexes are [0, 9, 10, 12, 13, 14]
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 6))
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))

self._seed()

def _seed(self, seed=None):
np_random, seed1 = seeding.np_random(seed)
# Derive a random seed.
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
self.game.set_seed(seed2)

# action indexes are [0, 9, 10, 12, 13, 14]
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 6), np_random=np_random)
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
return [seed1, seed2]
seed = seeding.hash_seed(seed) % 2**32
self.game.set_seed(seed)
return [seed]
16 changes: 7 additions & 9 deletions gym/envs/doom/doom_deathmatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,15 +46,13 @@ def __init__(self):
self.game.init()
self.game.new_episode()

# 41 allowed actions (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 37 + [[0, 10, 0]] * 5))
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))

self._seed()

def _seed(self, seed=None):
np_random, seed1 = seeding.np_random(seed)
# Derive a random seed.
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
self.game.set_seed(seed2)

# 41 allowed actions (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 37 + [[0, 10, 0]] * 5), np_random=np_random)
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
return [seed1, seed2]
seed = seeding.hash_seed(seed) % 2**32
self.game.set_seed(seed)
return [seed]
16 changes: 7 additions & 9 deletions gym/envs/doom/doom_defend_center.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,15 +55,13 @@ def __init__(self):
self.game.init()
self.game.new_episode()

# 3 allowed actions [0, 13, 14] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))

self._seed()

def _seed(self, seed=None):
np_random, seed1 = seeding.np_random(seed)
# Derive a random seed.
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
self.game.set_seed(seed2)

# 3 allowed actions [0, 13, 14] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3), np_random=np_random)
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
return [seed1, seed2]
seed = seeding.hash_seed(seed) % 2**32
self.game.set_seed(seed)
return [seed]
15 changes: 6 additions & 9 deletions gym/envs/doom/doom_defend_line.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,17 +52,14 @@ def __init__(self):
self.screen_width = 640 # Must match .cfg file
self.game.set_window_visible(False)
self.viewer = None
# 3 allowed actions [0, 13, 14] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
self._seed()
self.game.init()
self.game.new_episode()

def _seed(self, seed=None):
np_random, seed1 = seeding.np_random(seed)
# Derive a random seed.
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
self.game.set_seed(seed2)

# 3 allowed actions [0, 13, 14] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3), np_random=np_random)
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
return [seed1, seed2]
seed = seeding.hash_seed(seed) % 2**32
self.game.set_seed(seed)
return [seed]
16 changes: 7 additions & 9 deletions gym/envs/doom/doom_health_gathering.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,15 +53,13 @@ def __init__(self):
self.game.init()
self.game.new_episode()

# 3 allowed actions [12, 13, 14] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))

self._seed()

def _seed(self, seed=None):
np_random, seed1 = seeding.np_random(seed)
# Derive a random seed.
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
self.game.set_seed(seed2)

# 3 allowed actions [12, 13, 14] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3), np_random=np_random)
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
return [seed1, seed2]
seed = seeding.hash_seed(seed) % 2**32
self.game.set_seed(seed)
return [seed]
16 changes: 7 additions & 9 deletions gym/envs/doom/doom_my_way_home.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,15 +52,13 @@ def __init__(self):
self.game.init()
self.game.new_episode()

# 3 allowed actions [12, 13, 14] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))

self._seed()

def _seed(self, seed=None):
np_random, seed1 = seeding.np_random(seed)
# Derive a random seed.
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
self.game.set_seed(seed2)

# 3 allowed actions [12, 13, 14] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3), np_random=np_random)
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
return [seed1, seed2]
seed = seeding.hash_seed(seed) % 2**32
self.game.set_seed(seed)
return [seed]
Loading

0 comments on commit 8a535ca

Please sign in to comment.