<h2> Chap2. OpenAI Gym

Install : https://gym.openai.com/docs/

<h3> Agent anatomy

In [1]:
import random


class Environment:
    def __init__(self):
        self.steps_left = 10

    def get_observation(self):
        return [0.0, 0.0, 0.0]

    def get_actions(self):
        return [0, 1]

    def is_done(self):
        return self.steps_left == 0

    def action(self, action):
        if self.is_done():
            raise Exception("Game is over")
        self.steps_left -= 1
        return random.random()


class Agent:
    def __init__(self):
        self.total_reward = 0.0

    def step(self, env):
        current_obs = env.get_observation()
        actions = env.get_actions()
        reward = env.action(random.choice(actions))
        self.total_reward += reward

In [2]:
env = Environment()
agent = Agent()

while not env.is_done():
    agent.step(env)

print("Total reward got: %.4f" % agent.total_reward)

Total reward got: 6.1828


<h3> Cartpole random

In [3]:
import gym


def random():
    env = gym.make("CartPole-v0")

    total_reward = 0.0
    total_steps = 0
    obs = env.reset()

    while True:
        action = env.action_space.sample()
        obs, reward, done, _ = env.step(action)
        total_reward += reward
        total_steps += 1
        if done:
            break

    print("Episode done in %d steps, total reward %.2f" % (total_steps, total_reward))

In [4]:
random()

Episode done in 28 steps, total reward 28.00


<h3> Random Actionwrapper

In [5]:
import gym
import random


class RandomActionWrapper(gym.ActionWrapper):
    def __init__(self, env, epsilon=0.1):
        super(RandomActionWrapper, self).__init__(env)
        self.epsilon = epsilon

    def action(self, action):
        if random.random() < self.epsilon:
            print("Random!")
            return self.env.action_space.sample()
        return action

In [6]:
env = RandomActionWrapper(gym.make("CartPole-v0"))

obs = env.reset()
total_reward = 0.0

while True:
    obs, reward, done, _ = env.step(0)
    total_reward += reward
    if done:
        break

print("Reward got: %.2f" % total_reward)

Random!
Reward got: 10.00


<h3> Cartpole Random Monitor

In [11]:
import gym
import time

env = gym.make("CartPole-v0")
env = gym.wrappers.Monitor(env, "recording", force=True)

total_reward = 0.0
total_steps = 0
obs = env.reset()

while True:
    action = env.action_space.sample()
    obs, reward, done, _ = env.step(action)
    print(obs)
    total_reward += reward
    total_steps += 1
    time.sleep(1/60)
    if done:
        break

print("Episode done in %d steps, total reward %.2f" % (total_steps, total_reward))
env.close()
env.env.close()

[ 0.02296346 -0.22127322  0.04820323  0.32388331]
[ 0.018538   -0.0268696   0.0546809   0.04678285]
[ 0.01800061  0.16742736  0.05561656 -0.22815881]
[ 0.02134916 -0.02844346  0.05105338  0.08153598]
[ 0.02078029  0.16591088  0.0526841  -0.19461251]
[ 0.0240985  -0.02992356  0.04879185  0.11421353]
[ 0.02350003 -0.22570945  0.05107612  0.42188198]
[ 0.01898584 -0.03134693  0.05951376  0.14572826]
[ 0.01835891  0.16287448  0.06242832 -0.12760135]
[ 0.0216164   0.35704913  0.0598763  -0.39995394]
[ 0.02875738  0.16113119  0.05187722 -0.08901056]
[ 0.03198    -0.0346945   0.05009701  0.21957786]
[ 0.03128611  0.15967688  0.05448856 -0.05689107]
[ 0.03447965  0.35397694  0.05335074 -0.33189688]
[ 0.04155919  0.54830051  0.04671281 -0.60728984]
[ 0.0525252   0.35255764  0.03456701 -0.30026724]
[0.05957635 0.15696047 0.02856166 0.00311391]
[ 0.06271556 -0.03855921  0.02862394  0.3046698 ]
[ 0.06194438 -0.23407713  0.03471734  0.60624084]
[ 0.05726283 -0.42966689  0.04684216  0.90965354]
[ 0.

<b>Last observation<br></b>
x : -0.02873893 <br>
θ : -1.02766337<br>
dx/dt : 0.24277478<br>
dθ/dt : 2.11799185<br>