Permalink
Browse files

Initial release. Hello world :).

  • Loading branch information...
gdb committed Apr 27, 2016
0 parents commit e8f29806033861f196cbc679788b985f4b40c4b7
Showing with 6,500 additions and 0 deletions.
  1. +31 −0 .gitignore
  2. +32 −0 .travis.yml
  3. +13 −0 CODE_OF_CONDUCT.rst
  4. +35 −0 Dockerfile
  5. +7 −0 Makefile
  6. +208 −0 README.rst
  7. +12 −0 bin/docker_entrypoint
  8. +19 −0 examples/agents/_policies.py
  9. +92 −0 examples/agents/cem.py
  10. +50 −0 examples/agents/random_agent.py
  11. +44 −0 examples/agents/tabular_q_agent.py
  12. +5 −0 examples/scripts/list_envs
  13. +35 −0 examples/scripts/play_go
  14. +69 −0 examples/scripts/sim_env
  15. +44 −0 examples/scripts/upload
  16. +16 −0 gym/__init__.py
  17. +87 −0 gym/configuration.py
  18. +173 −0 gym/core.py
  19. +20 −0 gym/envs/README.md
  20. +208 −0 gym/envs/__init__.py
  21. +3 −0 gym/envs/algorithmic/README.md
  22. +5 −0 gym/envs/algorithmic/__init__.py
  23. +203 −0 gym/envs/algorithmic/algorithmic_env.py
  24. +24 −0 gym/envs/algorithmic/copy.py
  25. +27 −0 gym/envs/algorithmic/duplicated_input.py
  26. +29 −0 gym/envs/algorithmic/repeat_copy.py
  27. +27 −0 gym/envs/algorithmic/reverse.py
  28. +30 −0 gym/envs/algorithmic/reversed_addition.py
  29. +1 −0 gym/envs/atari/__init__.py
  30. +121 −0 gym/envs/atari/atari_env.py
  31. +1 −0 gym/envs/board_game/__init__.py
  32. +233 −0 gym/envs/board_game/go.py
  33. +5 −0 gym/envs/classic_control/__init__.py
  34. +288 −0 gym/envs/classic_control/acrobot.py
  35. BIN gym/envs/classic_control/assets/clockwise.png
  36. +118 −0 gym/envs/classic_control/cartpole.py
  37. +119 −0 gym/envs/classic_control/mountain_car.py
  38. +89 −0 gym/envs/classic_control/pendulum.py
  39. +292 −0 gym/envs/classic_control/rendering.py
  40. +1 −0 gym/envs/mujoco/.gitignore
  41. +12 −0 gym/envs/mujoco/__init__.py
  42. +46 −0 gym/envs/mujoco/ant.py
  43. +80 −0 gym/envs/mujoco/assets/ant.xml
  44. +95 −0 gym/envs/mujoco/assets/half_cheetah.xml
  45. +44 −0 gym/envs/mujoco/assets/hopper.xml
  46. +120 −0 gym/envs/mujoco/assets/humanoid.xml
  47. +47 −0 gym/envs/mujoco/assets/inverted_double_pendulum.xml
  48. +27 −0 gym/envs/mujoco/assets/inverted_pendulum.xml
  49. +31 −0 gym/envs/mujoco/assets/point.xml
  50. +39 −0 gym/envs/mujoco/assets/reacher.xml
  51. +38 −0 gym/envs/mujoco/assets/swimmer.xml
  52. +61 −0 gym/envs/mujoco/assets/walker2d.xml
  53. +35 −0 gym/envs/mujoco/half_cheetah.py
  54. +41 −0 gym/envs/mujoco/hopper.py
  55. +53 −0 gym/envs/mujoco/humanoid.py
  56. +43 −0 gym/envs/mujoco/inverted_double_pendulum.py
  57. +31 −0 gym/envs/mujoco/inverted_pendulum.py
  58. +109 −0 gym/envs/mujoco/mujoco_env.py
  59. +45 −0 gym/envs/mujoco/reacher.py
  60. +35 −0 gym/envs/mujoco/swimmer.py
  61. +41 −0 gym/envs/mujoco/walker2d.py
  62. +115 −0 gym/envs/registration.py
  63. +35 −0 gym/envs/tests/test_envs.py
  64. +35 −0 gym/envs/tests/test_registration.py
  65. +2 −0 gym/envs/toy_text/__init__.py
  66. +40 −0 gym/envs/toy_text/discrete.py
  67. +127 −0 gym/envs/toy_text/frozen_lake.py
  68. +40 −0 gym/envs/toy_text/roulette.py
  69. +135 −0 gym/envs/toy_text/taxi.py
  70. +97 −0 gym/error.py
  71. +3 −0 gym/monitoring/__init__.py
  72. +328 −0 gym/monitoring/monitor.py
  73. +56 −0 gym/monitoring/stats_recorder.py
  74. +67 −0 gym/monitoring/tests/test_video_recorder.py
  75. +290 −0 gym/monitoring/video_recorder.py
  76. +9 −0 gym/scoreboard/__init__.py
  77. +181 −0 gym/scoreboard/api.py
  78. +4 −0 gym/scoreboard/client/README.md
  79. +6 −0 gym/scoreboard/client/__init__.py
  80. +158 −0 gym/scoreboard/client/api_requestor.py
  81. +93 −0 gym/scoreboard/client/http_client.py
  82. +378 −0 gym/scoreboard/client/resource.py
  83. 0 gym/scoreboard/client/tests/__init__.py
  84. +32 −0 gym/scoreboard/client/tests/helper.py
  85. +16 −0 gym/scoreboard/client/tests/test_evaluation.py
  86. +15 −0 gym/scoreboard/client/tests/test_file_upload.py
  87. +14 −0 gym/scoreboard/client/util.py
  88. +123 −0 gym/scoreboard/scoring.py
  89. +5 −0 gym/spaces/__init__.py
  90. +39 −0 gym/spaces/box.py
  91. +17 −0 gym/spaces/discrete.py
  92. +30 −0 gym/spaces/tests/test_spaces.py
  93. +26 −0 gym/spaces/tuple_space.py
  94. +55 −0 gym/utils.py
  95. +1 −0 gym/version.py
  96. +5 −0 requirements.txt
  97. +34 −0 setup.py
@@ -0,0 +1,31 @@
*.swp
*.pyc
*.py~
.DS_Store
# Setuptools distribution and build folders.
/dist/
/build
# Virtualenv
/env
# Python egg metadata, regenerated from source files by setuptools.
/*.egg-info
*.sublime-project
*.sublime-workspace
logs/
.ipynb_checkpoints
ghostdriver.log
junk
MUJOCO_LOG.txt
mujoco-bundle
rllab_mujoco
tutorial/*.html
@@ -0,0 +1,32 @@
dist: trusty
sudo: required
cache:
apt: true
pip: false
language: python
python:
- "2.7"
# - "3.2"
# Install numpy and scipy so we don't need to compile them
addons:
apt:
packages:
- python-numpy
- python-matplotlib
- python-tk
before_install:
- Xvfb :12 -screen 0 800x600x24 +extension RANDR &
- mkdir -p ~/.mujoco
- curl https://openai-public.s3-us-west-2.amazonaws.com/mujoco/$MUJOCO_KEY_BUNDLE.tar.gz | tar xz -C ~/.mujoco
env:
- DISPLAY=:12
install: pip install -r requirements.txt
script: nose2
notifications:
slack:
secure: h/Mxm8K+avH/2W0818zCHmLloRPMFN4NJL01+VShvAkH80/acfjeq/+mMdWXXPL/oOB6kSHDk+GDhwR6+s03ZcPMn5INTFvFYqUc6UWmT+NXtOPxGTN0xda6MdYUkWQUKaMyjFrweZQOMOASFBIzPOq4XeVbM5aB8s4EJhnfAcYZhp/idwKbToVihN4KZgxlvZIFc8iEp1o9uSl5qrsaeYYYXRkb6mauacAwOo4/Chu+cOnoLUOnvhBFE3rV3doDNrbnoalO8XiExtgx5CIAYWrlMni7r2Q+LlzgwdyTH19ZtybPxJTZIIWSBQ2UtcoYdIEDcc36GcUwz1VUGg32mLJJnY2xw80CWR4ixFPpLwwP5Y99WTn8v094B4nmFTWOwNWXp3EkqtTN9XcJoRBqXB5ArucIPqrx57dOCljSKx22gL6WaF2p3stSAxIGFektGyGnisaELrFZG1C63aHoUPicj3gUlijmAoUmYaDRf6P1wnpXqBpKDAWWhAMSatvx1ekmEJgR7OQklQnnfjx9kENDUygNUWS4IQwN2qYieuzHFL3of7/30mTM43+Vt/vWN8GI7j01BXu6FNGGloHxjH1pt3bLP/+uj5BJsT2HWF+Z8XR4VE6cyVuKsQAFgCXwOkoDHALbcwsspONDIt/9ixkesgh1oFt4CzU3UuU5wYs=
on_success: change
@@ -0,0 +1,13 @@
OpenAI Gym is dedicated to providing a harassment-free experience for
everyone, regardless of gender, gender identity and expression, sexual
orientation, disability, physical appearance, body size, age, race, or
religion. We do not tolerate harassment of participants in any form.
This code of conduct applies to all OpenAI Gym spaces (including Gist
comments) both online and off. Anyone who violates this code of
conduct may be sanctioned or expelled from these spaces at the
discretion of the OpenAI team.
We may add additional rules over time, which will be made clearly
available to participants. Participants are responsible for knowing
and abiding by these rules.
@@ -0,0 +1,35 @@
# A Dockerfile that sets up a full Gym install
FROM ubuntu:14.04
RUN apt-get update \
&& apt-get install -y xorg-dev \
libgl1-mesa-dev \
xvfb \
libxinerama1 \
libxcursor1 \
libglu1-mesa \
libav-tools \
python-numpy \
python-scipy \
python-pyglet \
python-setuptools \
libpq-dev \
libjpeg-dev \
curl \
cmake \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* \
&& easy_install pip
WORKDIR /usr/local/gym
RUN mkdir gym && touch gym/__init__.py
COPY ./gym/version.py ./gym
COPY ./requirements.txt .
COPY ./setup.py .
RUN pip install -r requirements.txt
# Finally, upload our actual code!
COPY . /usr/local/gym
WORKDIR /root
ENTRYPOINT ["/usr/local/gym/bin/docker_entrypoint"]
@@ -0,0 +1,7 @@
.PHONY: install test
install:
pip install -r requirements.txt
test:
nose2
@@ -0,0 +1,208 @@
gym
******
**OpenAI Gym is a toolkit for developing and comparing reinforcement learning algorithms.** This is the ``gym`` open-source library, which gives you access to an ever-growing variety of environments.
``gym`` makes no assumptions about the structure of your agent, and is compatible with any numerical computation library, such as Tensorflow or Theano. You can use it from Python code, and soon from other languages.
If you're not sure where to start, we recommend beginning with the
`docs <https://gym.openai.com/docs>`_ on our site.
.. contents:: **Contents of this document**
:depth: 2
Basics
======
There are two basic concepts in reinforcement learning: the
environment (namely, the outside world) and the agent (namely, the
algorithm you are writing). The agent sends `actions` to the
environment, and the environment replies with `observations` and
`rewards` (that is, a score).
The core `gym` interface is `Env
<https://github.com/openai/gym/blob/master/gym/core.py>`_, which is
the unified environment interface. There is no interface for agents;
that part is left to you. The following are the ``Env`` methods you
should know:
- `reset(self)`: Reset the environment's state. Returns `observation`.
- `step(self, action)`: Step the environment by one timestep. Returns `observation`, `action`, `reward`, `done`.
- `render(self, mode='human', close=False)`: Render one frame of the environment. The default mode will do something human friendly, such as pop up a window. Passing the `close` flag signals the renderer to close any such windows.
Installation
============
You can perform a minimal install of ``gym`` with:
.. code:: shell
git clone git@github.com:gym
cd gym
pip install -e .
You'll be able to run a few environments right away:
- `algorithmic <https://gym.openai.com/envs#algorithmic>`_
- `toy_text <https://gym.openai.com/envs#toy_text>`_
- `classic_control <https://gym.openai.com/envs#classic_control>`_ (you'll need ``pyglet`` to render though)
We recommend playing with those environments at first, and then later
installing the dependencies for the remaining environments.
Installing everything
---------------------
Once you're ready to install everything, run ``pip install -e .[all]``.
MuJoCo has a proprietary dependency we can't set up for you. Follow
the
`instructions <https://github.com/openai/mujoco-py#obtaining-the-binaries-and-license-key>`_
in the ``mujoco-py`` package for help.
For the install to succeed, you'll need to have some system packages
installed. We'll build out the list here over time; please let us know
what you end up installing on your platform.
On Ubuntu 14.04:
.. code:: shell
apt-get install -y numpy python-dev cmake zlib1g-dev libjpeg-dev xvfb libav-tools xorg-dev python-opengl
Supported systems
-----------------
We currenty support Python 2.7 on Linux and OSX.
We will expand support to Python 3 and Windows based on demand. We
will also soon ship a Docker container exposing OpenAI Gym as an API
callable from any platform.
Pip version
-----------
To run ``pip install -e .[all]``, you'll need a semi-recent pip.
Please make sure your pip is at least at version ``1.5.0``. You can
upgrade using the following: ``pip install --ignore-installed
pip``. Alternatively, you can open `setup.py
<https://github.com/openai/gym/blob/master/setup.py>`_ and
install the dependencies by hand.
Installing dependencies for specific environments
-------------------------------------------------
If you'd like to install the dependencies for only specific
environments, see `setup.py
<https://github.com/openai/gym/blob/master/setup.py>`_. We
maintain the lists of dependencies on a per-environment group basis.
Environments
============
The code for each environment group is housed in its own subdirectory
`gym/envs
<https://github.com/openai/gym/blob/master/gym/envs>`_. The
specification of each task is in `gym/envs/__init__.py
<https://github.com/openai/gym/blob/master/gym/envs/__init__.py>`_. It's
worth browsing through both.
Algorithmic
-----------
These are a variety of algorithmic tasks, such as learning to copy a
sequence.
.. code:: python
import gym
env = gym.make('Copy-v0')
env.reset()
env.render()
Atari
-----
The Atari environments are a variety of Atari video games. If you didn't do the full install, you can install dependencies via ``pip install -e .[atari]`` and then get started as follow:
.. code:: python
import gym
env = gym.make('SpaceInvaders-v0')
env.reset()
env.render()
This will install ``atari-py``, which automatically compiles the `Arcade Learning Environment <http://www.arcadelearningenvironment.org/>`_. This can take quite a while (a few minutes on a decent laptop), so just be prepared.
Board games
-----------
The board game environments are a variety of board games. If you didn't do the full install, you can install dependencies via ``pip install -e .[board_game]`` and then get started as follow:
.. code:: python
import gym
env = gym.make('Go9x9-v0')
env.reset()
env.render()
Classic control
---------------
These are a variety of classic control tasks, which would appear in a typical reinforcement learning textbook. If you didn't do the full install, you will need to run ``pip install -e .[classic_control]`` to enable rendering. You can get started with them via:
.. code:: python
import gym
env = gym.make('CartPole-v0')
env.reset()
env.render()
MuJoCo
------
`MuJoCo <http://www.mujoco.org/>`_ is a physics engine which can do
very detailed efficient simulations with contacts. It's not
open-source, so you'll have to follow the instructions in `mujoco-py
<https://github.com/openai/mujoco-py#obtaining-the-binaries-and-license-key>`_
to set it up. You'll have to also run ``pip install -e .[mujoco]`` if you didn't do the full install.
.. code:: python
import gym
env = gym.make('Humanoid')
env.reset()
env.render()
Toy text
--------
Toy environments which are text-based. There's no extra dependency to install, so to get started, you can just do:
.. code:: python
import gym
env = gym.make('FrozenLake')
env.reset()
env.render()
Examples
========
See the ``examples`` directory.
- Run `examples/agents/random_agent.py <https://github.com/openai/gym/blob/master/examples/agents/random_agent.py>`_ to run an simple random agent and upload the results to the scoreboard.
- Run `examples/agents/cem.py <https://github.com/openai/gym/blob/master/examples/agents/cem.py>`_ to run an actual learning agent (using the cross-entropy method) and upload the results to the scoreboard.
- Run `examples/scripts/list_envs <https://github.com/openai/gym/blob/master/examples/scripts/list_envs>`_ to generate a list of all environments. (You see also just `browse <https://gym.openai.com/docs>`_ the list on our site.
- Run `examples/scripts/upload <https://github.com/openai/gym/blob/master/examples/scripts/upload>`_ to upload the recorded output from ``random_agent.py`` or ``cem.py``. Make sure to obtain an `API key <https://gym.openai.com/settings/profile>`_.
Testing
=======
We are using `nose2 <https://github.com/nose-devs/nose2>`_ for tests. You can run them via
.. code:: shell
nose2
You can also run tests in a specific directory by using the ``-s`` option, or by passing in the specific name of the test. See the `nose2 docs <http://nose2.readthedocs.org/en/latest/usage.html#naming-tests>`_ for more details.
@@ -0,0 +1,12 @@
#!/bin/sh
# This script is the entrypoint for our Docker image.
set -e
# Set up display; otherwise rendering will cause segfaults
rm -f /tmp/.X12-lock
Xvfb :12 -screen 0 800x600x24 +extension RANDR 2>/dev/null &
export DISPLAY=:12
exec "$@"
@@ -0,0 +1,19 @@
# Support code for cem.py
class BinaryActionLinearPolicy(object):
def __init__(self, theta):
self.w = theta[:-1]
self.b = theta[-1]
def act(self, ob):
y = ob.dot(self.w) + self.b
a = int(y < 0)
return a
class ContinuousActionLinearPolicy(object):
def __init__(self, theta, n_in, n_out):
assert len(theta) == (n_in + 1) * n_out
self.W = theta[0 : n_in * n_out].reshape(n_in, n_out)
self.b = theta[n_in * n_out : None].reshape(1, n_out)
def act(self, ob):
a = ob.dot(self.W) + self.b
return a
Oops, something went wrong.

0 comments on commit e8f2980

Please sign in to comment.