openai · jonasschneider · Apr 30, 2016 · Apr 28, 2016 · Apr 28, 2016 · Apr 28, 2016
diff --git a/.gitignore b/.gitignore
@@ -28,4 +28,6 @@ mujoco-bundle
 
 rllab_mujoco
 
-tutorial/*.html
+tutorial/*.html
+.eggs
+.tox
diff --git a/.travis.yml b/.travis.yml
@@ -4,18 +4,18 @@ cache:
   apt: true
   pip: false
 language: python
-python:
-  - "2.7"
-  # - "3.2"
-
-# Install numpy and scipy so we don't need to compile them
 addons:
-    apt:
-      packages:
-        - python-numpy
-        - python-matplotlib
-        - python-tk
+  apt:
+    sources:
+      - george-edison55-precise-backports # cmake 3.2.3
+    packages:
+      - cmake
+      - cmake-data
 
+      - python-dev
+python:
+  - "2.7"
+  - "3.5"
 before_install:
   - Xvfb :12 -screen 0 800x600x24 +extension RANDR &
   - mkdir -p ~/.mujoco
@@ -24,9 +24,8 @@ before_install:
   - '[ "${TRAVIS_PULL_REQUEST}" != "false" ] || ( curl https://openai-public.s3-us-west-2.amazonaws.com/mujoco/$MUJOCO_KEY_BUNDLE.tar.gz | tar xz -C ~/.mujoco )'
 env:
   - DISPLAY=:12
-
-install: pip install -r requirements.txt
-script: nose2
+install: pip install tox-travis
+script: tox
 
 notifications:
   slack:

diff --git a/README.rst b/README.rst
@@ -8,6 +8,16 @@ gym
 If you're not sure where to start, we recommend beginning with the
 `docs <https://gym.openai.com/docs>`_ on our site.
 
+Supported systems
+-----------------
+
+We currently support Linux and OS X running Python 2.7 or 3.5.
+Python 3 support should still be considered experimental -- if you find any bugs, please report them!
+
+We will expand support to Windows based on demand. We
+will also soon ship a Docker container exposing the environments
+callable from any platform, for use with any non-Python framework, such as Torch.
+
 .. contents:: **Contents of this document**
    :depth: 2
 
@@ -36,7 +46,7 @@ Installation
 You can perform a minimal install of ``gym`` with:
 
 .. code:: shell
-	  
+
 	  git clone https://github.com/openai/gym.git
 	  cd gym
 	  pip install -e .
@@ -79,17 +89,8 @@ On OSX:
 On Ubuntu 14.04:
 
 .. code:: shell
-
-	  apt-get install -y python-numpy python-dev cmake zlib1g-dev libjpeg-dev xvfb libav-tools xorg-dev python-opengl
-
-Supported systems
------------------
-
-We currently support Python 2.7 on Linux and OSX.
 
-We will expand support to Python 3 and Windows based on demand. We
-will also soon ship a Docker container exposing OpenAI Gym as an API
-callable from any platform.
+	  apt-get install -y python-numpy python-dev cmake zlib1g-dev libjpeg-dev xvfb libav-tools xorg-dev python-opengl
 
 Pip version
 -----------

diff --git a/gym/envs/algorithmic/algorithmic_env.py b/gym/envs/algorithmic/algorithmic_env.py
@@ -3,7 +3,7 @@
 from gym.utils import colorize
 import numpy as np
 import random
-import StringIO
+from six import StringIO
 import sys
 import math
 
@@ -91,7 +91,7 @@ def _render(self, mode='human', close=False):
             # Nothing interesting to close
             return
 
-        outfile = StringIO.StringIO() if mode == 'ansi' else sys.stdout
+        outfile = StringIO() if mode == 'ansi' else sys.stdout
         inp = "Total length of input instance: %d, step: %d\n" % (self.total_len, self.time)
         outfile.write(inp)
         x, y, action = self.x, self.y, self.last_action

diff --git a/gym/envs/atari/atari_env.py b/gym/envs/atari/atari_env.py
@@ -6,8 +6,8 @@
 
 try:
     import atari_py
-except ImportError:
-    raise error.DependencyNotInstalled("{}. (HINT: you can install Atari dependencies with 'pip install gym[atari]'.)")
+except ImportError as e:
+    raise error.DependencyNotInstalled("{}. (HINT: you can install Atari dependencies with 'pip install gym[atari].)'".format(e))
 
 import logging
 logger = logging.getLogger(__name__)
@@ -53,7 +53,7 @@ def _step(self, a):
         reward = 0.0
         action = self._action_set[a]
         num_steps = np.random.randint(2, 5)
-        for _ in xrange(num_steps):
+        for _ in range(num_steps):
             reward += self.ale.act(action)
         ob = self._get_obs()
 
@@ -80,7 +80,7 @@ def _reset(self):
         self.ale.reset_game()
         return self._get_obs()
 
-    def _render(self, mode='human', close=False):        
+    def _render(self, mode='human', close=False):
         if close:
             if self.viewer is not None:
                 self.viewer.close()
@@ -93,7 +93,7 @@ def _render(self, mode='human', close=False):
             if self.viewer is None:
                 self.viewer = rendering.SimpleImageViewer()
             self.viewer.imshow(img)
-    
+
     def get_action_meanings(self):
         return [ACTION_MEANING[i] for i in self._action_set]
 
@@ -118,4 +118,4 @@ def get_action_meanings(self):
     15 : "UPLEFTFIRE",
     16 : "DOWNRIGHTFIRE",
     17 : "DOWNLEFTFIRE",
-}
+}
diff --git a/gym/envs/board_game/go.py b/gym/envs/board_game/go.py
@@ -8,8 +8,9 @@
 import numpy as np
 import gym
 from gym import spaces
-import StringIO
+from six import StringIO
 import sys
+import six
 
 
 # The coordinate representation of Pachi (and pachi_py) is defined on a board
@@ -66,7 +67,7 @@ def act(self, action):
             pachi_py.stone_other(self.color))
 
     def __repr__(self):
-        return 'To play: {}\n{}'.format(pachi_py.color_to_str(self.color), repr(self.board))
+        return 'To play: {}\n{}'.format(six.u(pachi_py.color_to_str(self.color)), six.u(self.board.__repr__()))
 
 
 ### Adversary policies ###
@@ -76,7 +77,7 @@ def random_policy(curr_state, prev_state, prev_action):
     return _coord_to_action(b, np.random.choice(legal_coords))
 
 def make_pachi_policy(board, engine_type='uct', threads=1, pachi_timestr=''):
-    engine = pachi_py.PyPachiEngine(board, engine_type, 'threads=%d' % threads)
+    engine = pachi_py.PyPachiEngine(board, engine_type, six.b('threads=%d' % threads))
 
     def pachi_policy(curr_state, prev_state, prev_action):
         if prev_state is not None:
@@ -182,7 +183,7 @@ def _reset(self):
     def _render(self, mode="human", close=False):
         if close:
             return
-        outfile = StringIO.StringIO() if mode == 'ansi' else sys.stdout
+        outfile = StringIO() if mode == 'ansi' else sys.stdout
         outfile.write(repr(self.state) + '\n')
         return outfile
 
@@ -204,7 +205,7 @@ def _step(self, action):
             self.state = self.state.act(action)
         except pachi_py.IllegalMove:
             if self.illegal_move_mode == 'raise':
-                raise
+                six.reraise(*sys.exc_info())
             elif self.illegal_move_mode == 'lose':
                 # Automatic loss on illegal move
                 self.done = True
@@ -250,6 +251,6 @@ def _reset_opponent(self, board):
         if self.opponent == 'random':
             self.opponent_policy = random_policy
         elif self.opponent == 'pachi:uct:_2400':
-            self.opponent_policy = make_pachi_policy(board=board, engine_type='uct', pachi_timestr='_2400') # TODO: strength as argument
+            self.opponent_policy = make_pachi_policy(board=board, engine_type=six.b('uct'), pachi_timestr=six.b('_2400')) # TODO: strength as argument
         else:
             raise error.Error('Unrecognized opponent policy {}'.format(self.opponent))
diff --git a/gym/envs/classic_control/rendering.py b/gym/envs/classic_control/rendering.py
@@ -196,7 +196,7 @@ def render1(self):
 
 def make_circle(radius=10, res=30, filled=True):
     points = []
-    for i in xrange(res):
+    for i in range(res):
         ang = 2*math.pi*i / res
         points.append((math.cos(ang)*radius, math.sin(ang)*radius))
     if filled:

diff --git a/gym/envs/mujoco/mujoco_env.py b/gym/envs/mujoco/mujoco_env.py
@@ -2,6 +2,7 @@
 
 import numpy as np
 import gym
+import six
 from gym import error, spaces
 
 try:
@@ -84,15 +85,15 @@ def reset_viewer_if_necessary(self):
             self.viewer_setup()
 
     def get_body_com(self, body_name):
-        idx = self.model.body_names.index(body_name)
+        idx = self.model.body_names.index(six.b(body_name))
         return self.model.data.com_subtree[idx]
 
     def get_body_comvel(self, body_name):
-        idx = self.model.body_names.index(body_name)
+        idx = self.model.body_names.index(six.b(body_name))
         return self.model.body_comvels[idx]
 
     def get_body_xmat(self, body_name):
-        idx = self.model.body_names.index(body_name)
+        idx = self.model.body_names.index(six.b(body_name))
         return self.model.data.xmat[idx].reshape((3, 3))
 
     @property

diff --git a/gym/envs/registration.py b/gym/envs/registration.py
@@ -1,7 +1,6 @@
 import logging
 import pkg_resources
 import re
-import six
 import sys
 from gym import error
 
@@ -11,14 +10,8 @@
 
 def load(name):
     entry_point = pkg_resources.EntryPoint.parse('x={}'.format(name))
-    try:
-        result = entry_point.load(False)
-    except ImportError as e:
-        _, _, traceback = sys.exc_info()
-        new_e = ImportError("{} (while loading {})".format(e, name))
-        six.reraise(type(new_e), new_e, traceback)
-    else:
-        return result
+    result = entry_point.load(False)
+    return result
 
 class EnvSpec(object):
     """A specification for a particular instance of the environment. Used
@@ -56,15 +49,7 @@ def __init__(self, id, entry_point, timestep_limit=1000, trials=100, reward_thre
     def make(self):
         """Instantiates an instance of the environment with appropriate kwargs"""
         cls = load(self._entry_point)
-        try:
-            env = cls(**self._kwargs)
-        except TypeError as e:
-            type, value, traceback = sys.exc_info()
-
-            # This likely indicates unsupported kwargs
-            six.reraise(type, """Could not 'make' {} ({}): {}.
-
-(For reference, the environment was instantiated with kwargs: {}).""".format(self.id, cls, e.message, self._kwargs), traceback)
+        env = cls(**self._kwargs)
 
         # Make the enviroment aware of which spec it came from.
         env.spec = self

diff --git a/gym/envs/tests/test_envs.py b/gym/envs/tests/test_envs.py
@@ -33,7 +33,7 @@ def test_random_rollout():
     for env in [envs.make('CartPole-v0'), envs.make('FrozenLake-v0')]:
         agent = lambda ob: env.action_space.sample()
         ob = env.reset()
-        for _ in xrange(10):
+        for _ in range(10):
             assert env.observation_space.contains(ob)
             a = agent(ob)
             assert env.action_space.contains(a)

diff --git a/gym/envs/tests/test_registration.py b/gym/envs/tests/test_registration.py
@@ -30,6 +30,6 @@ def test_malformed_lookup():
     try:
         registry.spec(u'“Breakout-v0”')
     except error.Error as e:
-        assert 'malformed environment ID' in e.message, 'Unexpected message: {}'.format(e)
+        assert 'malformed environment ID' in '{}'.format(e), 'Unexpected message: {}'.format(e)
     else:
         assert False
diff --git a/gym/envs/toy_text/frozen_lake.py b/gym/envs/toy_text/frozen_lake.py
@@ -1,5 +1,6 @@
 import numpy as np
-import StringIO, sys
+import sys
+from six import StringIO, b
 
 from gym import utils
 from gym.envs.toy_text import discrete
@@ -67,10 +68,10 @@ def __init__(self, desc=None, map_name="4x4",is_slippery=True):
         nA = 4
         nS = nrow * ncol
 
-        isd = (desc == 'S').ravel().astype('float64')
+        isd = np.array(desc == 'S').astype('float64')
         isd /= isd.sum()
 
-        P = {s : {a : [] for a in xrange(nA)} for s in xrange(nS)}
+        P = {s : {a : [] for a in range(nA)} for s in range(nS)}
 
         def to_s(row, col):
             return row*ncol + col
@@ -85,24 +86,24 @@ def inc(row, col, a):
                 row = max(row-1,0)
             return (row, col)
 
-        for row in xrange(nrow):
-            for col in xrange(ncol):
+        for row in range(nrow):
+            for col in range(ncol):
                 s = to_s(row, col)
-                for a in xrange(4):
+                for a in range(4):
                     li = P[s][a]
                     if is_slippery:
                         for b in [(a-1)%4, a, (a+1)%4]:
                             newrow, newcol = inc(row, col, b)
                             newstate = to_s(newrow, newcol)
                             letter = desc[newrow, newcol]
-                            done = letter in 'GH'
+                            done = str(letter) in 'GH'
                             rew = float(letter == 'G')
                             li.append((1.0/3.0, newstate, rew, done))
                     else:
                         newrow, newcol = inc(row, col, a)
                         newstate = to_s(newrow, newcol)
                         letter = desc[newrow, newcol]
-                        done = letter in 'GH'
+                        done = str(letter) in 'GH'
                         rew = float(letter == 'G')
                         li.append((1.0/3.0, newstate, rew, done))
 
@@ -112,13 +113,13 @@ def _render(self, mode='human', close=False):
         if close:
             return
 
-        outfile = StringIO.StringIO() if mode == 'ansi' else sys.stdout
+        outfile = StringIO() if mode == 'ansi' else sys.stdout
 
         row, col = self.s // self.ncol, self.s % self.ncol
         desc = self.desc.tolist()
+        desc = [[c.decode('utf-8') for c in line] for line in desc]
         desc[row][col] = utils.colorize(desc[row][col], "red", highlight=True)
-
-        outfile.write("\n".join("".join(row) for row in desc)+"\n")
+        outfile.write("\n".join(''.join(line) for line in desc)+"\n")
         if self.lastaction is not None:
             outfile.write("  ({})\n".format(["Left","Down","Right","Up"][self.lastaction]))
         else: