### Function Approximation

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tempfile
import base64
import pprint
import json
import sys
import gym
import io

from gym import wrappers
from subprocess import check_output
from IPython.display import HTML

In [15]:
# -*- coding: utf-8 -*-
import random
import gym
import numpy as np
from collections import deque
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import RMSprop
from keras import backend as K

EPISODES = 5000


class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=100000)
        self.gamma = 0.9    # discount rate
        self.epsilon = 1.0  # exploration rate
        self.e_decay = .99
        self.e_min = 0.05
        self.learning_rate = 0.0001
        self.model = self._build_model()
        self.target_model = self._build_model()

    def _huber_loss(self, target, prediction):
        # sqrt(1+error^2)-1
        error = prediction - target
        return K.mean(K.sqrt(1+K.square(error))-1, axis=-1)

    def _build_model(self):
        # Neural Net for Deep-Q learning Model
        model = Sequential()
        model.add(Dense(20, input_dim=self.state_size, activation='tanh'))
        model.add(Dense(20, activation='tanh', init='uniform'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss=self._huber_loss,
                      optimizer=RMSprop(lr=self.learning_rate))
        return model

    def update_target_model(self):
        # copy weights from model to target_model
        self.target_model.set_weights(self.model.get_weights())

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])  # returns action

    def replay(self, batch_size):
        batch_size = min(batch_size, len(self.memory))
        minibatch = random.sample(self.memory, batch_size)
        X = np.zeros((batch_size, self.state_size))
        Y = np.zeros((batch_size, self.action_size))
        for i in range(batch_size):
            state, action, reward, next_state, done = minibatch[i]
            target = self.model.predict(state)[0]
            if done:
                target[action] = reward
            else:
                a = np.argmax(self.model.predict(next_state)[0])
                t = self.target_model.predict(next_state)[0]
                target[action] = reward + self.gamma * t[a]
            X[i], Y[i] = state, target
        self.model.fit(X, Y, epochs=1, verbose=0)
        if self.epsilon > self.e_min:
            self.epsilon *= self.e_decay

    def load(self, name):
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)

In [16]:
env = gym.make('CartPole-v0')
state_size = env.observation_space.shape[0]
action_size = env.action_space.n
agent = DQNAgent(state_size, action_size)

for e in range(EPISODES):
    state = env.reset()
    state = np.reshape(state, [1, state_size])
    while True:
        action = agent.act(state)
        next_state, reward, done, _ = env.step(action)
        next_state = np.reshape(next_state, [1, state_size])
        agent.remember(state, action, reward, next_state, done)
        state = next_state
        if done:
            print("episode: {}/{}, score: {}, e: {:.2}"
                    .format(e, EPISODES, time, agent.epsilon))
            break
    if e % 30 == 0:
        agent.update_target_model()
    agent.replay(32)

[2017-04-03 01:45:01,605] Making new env: CartPole-v0


episode: 0/5000, score: 14, e: 1.0




episode: 1/5000, score: 14, e: 0.99
episode: 2/5000, score: 14, e: 0.98
episode: 3/5000, score: 14, e: 0.97
episode: 4/5000, score: 14, e: 0.96
episode: 5/5000, score: 14, e: 0.95
episode: 6/5000, score: 14, e: 0.94
episode: 7/5000, score: 14, e: 0.93
episode: 8/5000, score: 14, e: 0.92
episode: 9/5000, score: 14, e: 0.91
episode: 10/5000, score: 14, e: 0.9
episode: 11/5000, score: 14, e: 0.9
episode: 12/5000, score: 14, e: 0.89
episode: 13/5000, score: 14, e: 0.88
episode: 14/5000, score: 14, e: 0.87
episode: 15/5000, score: 14, e: 0.86
episode: 16/5000, score: 14, e: 0.85
episode: 17/5000, score: 14, e: 0.84
episode: 18/5000, score: 14, e: 0.83
episode: 19/5000, score: 14, e: 0.83
episode: 20/5000, score: 14, e: 0.82
episode: 21/5000, score: 14, e: 0.81
episode: 22/5000, score: 14, e: 0.8
episode: 23/5000, score: 14, e: 0.79
episode: 24/5000, score: 14, e: 0.79
episode: 25/5000, score: 14, e: 0.78
episode: 26/5000, score: 14, e: 0.77
episode: 27/5000, score: 14, e: 0.76
episode: 28/5

In [17]:
agent.epsilon = 0.0
mdir = tempfile.mkdtemp()
env = gym.make('CartPole-v0')
env = wrappers.Monitor(env, mdir, force=True)

[2017-04-03 01:50:39,997] Making new env: CartPole-v0


In [18]:
state_size = env.observation_space.shape[0]
action_size = env.action_space.n

for e in range(EPISODES//2):
    state = env.reset()
    state = np.reshape(state, [1, state_size])
    while True:
        action = agent.act(state)
        next_state, reward, done, _ = env.step(action)
        next_state = np.reshape(next_state, [1, state_size])
        state = next_state
        if done:
            print("episode: {}/{}, score: {}, e: {:.2}"
                  .format(e, EPISODES, time, agent.epsilon))
            break

[2017-04-03 01:50:40,016] Starting new video recorder writing to /tmp/tmp9iqe44lg/openaigym.video.2.286.video000000.mp4
[2017-04-03 01:50:40,665] Starting new video recorder writing to /tmp/tmp9iqe44lg/openaigym.video.2.286.video000001.mp4


episode: 0/5000, score: 14, e: 0.0


[2017-04-03 01:50:41,205] Starting new video recorder writing to /tmp/tmp9iqe44lg/openaigym.video.2.286.video000008.mp4


episode: 1/5000, score: 14, e: 0.0
episode: 2/5000, score: 14, e: 0.0
episode: 3/5000, score: 14, e: 0.0
episode: 4/5000, score: 14, e: 0.0
episode: 5/5000, score: 14, e: 0.0
episode: 6/5000, score: 14, e: 0.0
episode: 7/5000, score: 14, e: 0.0
episode: 8/5000, score: 14, e: 0.0
episode: 9/5000, score: 14, e: 0.0
episode: 10/5000, score: 14, e: 0.0
episode: 11/5000, score: 14, e: 0.0
episode: 12/5000, score: 14, e: 0.0
episode: 13/5000, score: 14, e: 0.0
episode: 14/5000, score: 14, e: 0.0
episode: 15/5000, score: 14, e: 0.0
episode: 16/5000, score: 14, e: 0.0
episode: 17/5000, score: 14, e: 0.0
episode: 18/5000, score: 14, e: 0.0
episode: 19/5000, score: 14, e: 0.0
episode: 20/5000, score: 14, e: 0.0
episode: 21/5000, score: 14, e: 0.0
episode: 22/5000, score: 14, e: 0.0
episode: 23/5000, score: 14, e: 0.0
episode: 24/5000, score: 14, e: 0.0
episode: 25/5000, score: 14, e: 0.0


[2017-04-03 01:50:42,176] Starting new video recorder writing to /tmp/tmp9iqe44lg/openaigym.video.2.286.video000027.mp4


episode: 26/5000, score: 14, e: 0.0
episode: 27/5000, score: 14, e: 0.0
episode: 28/5000, score: 14, e: 0.0
episode: 29/5000, score: 14, e: 0.0
episode: 30/5000, score: 14, e: 0.0
episode: 31/5000, score: 14, e: 0.0
episode: 32/5000, score: 14, e: 0.0
episode: 33/5000, score: 14, e: 0.0
episode: 34/5000, score: 14, e: 0.0
episode: 35/5000, score: 14, e: 0.0
episode: 36/5000, score: 14, e: 0.0
episode: 37/5000, score: 14, e: 0.0
episode: 38/5000, score: 14, e: 0.0
episode: 39/5000, score: 14, e: 0.0
episode: 40/5000, score: 14, e: 0.0
episode: 41/5000, score: 14, e: 0.0
episode: 42/5000, score: 14, e: 0.0
episode: 43/5000, score: 14, e: 0.0
episode: 44/5000, score: 14, e: 0.0
episode: 45/5000, score: 14, e: 0.0
episode: 46/5000, score: 14, e: 0.0
episode: 47/5000, score: 14, e: 0.0
episode: 48/5000, score: 14, e: 0.0
episode: 49/5000, score: 14, e: 0.0
episode: 50/5000, score: 14, e: 0.0
episode: 51/5000, score: 14, e: 0.0
episode: 52/5000, score: 14, e: 0.0
episode: 53/5000, score: 14,

[2017-04-03 01:50:43,402] Starting new video recorder writing to /tmp/tmp9iqe44lg/openaigym.video.2.286.video000064.mp4


episode: 57/5000, score: 14, e: 0.0
episode: 58/5000, score: 14, e: 0.0
episode: 59/5000, score: 14, e: 0.0
episode: 60/5000, score: 14, e: 0.0
episode: 61/5000, score: 14, e: 0.0
episode: 62/5000, score: 14, e: 0.0
episode: 63/5000, score: 14, e: 0.0
episode: 64/5000, score: 14, e: 0.0
episode: 65/5000, score: 14, e: 0.0
episode: 66/5000, score: 14, e: 0.0
episode: 67/5000, score: 14, e: 0.0
episode: 68/5000, score: 14, e: 0.0
episode: 69/5000, score: 14, e: 0.0
episode: 70/5000, score: 14, e: 0.0
episode: 71/5000, score: 14, e: 0.0
episode: 72/5000, score: 14, e: 0.0
episode: 73/5000, score: 14, e: 0.0
episode: 74/5000, score: 14, e: 0.0
episode: 75/5000, score: 14, e: 0.0
episode: 76/5000, score: 14, e: 0.0
episode: 77/5000, score: 14, e: 0.0
episode: 78/5000, score: 14, e: 0.0
episode: 79/5000, score: 14, e: 0.0
episode: 80/5000, score: 14, e: 0.0
episode: 81/5000, score: 14, e: 0.0
episode: 82/5000, score: 14, e: 0.0
episode: 83/5000, score: 14, e: 0.0
episode: 84/5000, score: 14,

[2017-04-03 01:50:45,238] Starting new video recorder writing to /tmp/tmp9iqe44lg/openaigym.video.2.286.video000125.mp4


episode: 115/5000, score: 14, e: 0.0
episode: 116/5000, score: 14, e: 0.0
episode: 117/5000, score: 14, e: 0.0
episode: 118/5000, score: 14, e: 0.0
episode: 119/5000, score: 14, e: 0.0
episode: 120/5000, score: 14, e: 0.0
episode: 121/5000, score: 14, e: 0.0
episode: 122/5000, score: 14, e: 0.0
episode: 123/5000, score: 14, e: 0.0
episode: 124/5000, score: 14, e: 0.0
episode: 125/5000, score: 14, e: 0.0
episode: 126/5000, score: 14, e: 0.0
episode: 127/5000, score: 14, e: 0.0
episode: 128/5000, score: 14, e: 0.0
episode: 129/5000, score: 14, e: 0.0
episode: 130/5000, score: 14, e: 0.0
episode: 131/5000, score: 14, e: 0.0
episode: 132/5000, score: 14, e: 0.0
episode: 133/5000, score: 14, e: 0.0
episode: 134/5000, score: 14, e: 0.0
episode: 135/5000, score: 14, e: 0.0
episode: 136/5000, score: 14, e: 0.0
episode: 137/5000, score: 14, e: 0.0
episode: 138/5000, score: 14, e: 0.0
episode: 139/5000, score: 14, e: 0.0
episode: 140/5000, score: 14, e: 0.0
episode: 141/5000, score: 14, e: 0.0
e

[2017-04-03 01:50:47,604] Starting new video recorder writing to /tmp/tmp9iqe44lg/openaigym.video.2.286.video000216.mp4


episode: 208/5000, score: 14, e: 0.0
episode: 209/5000, score: 14, e: 0.0
episode: 210/5000, score: 14, e: 0.0
episode: 211/5000, score: 14, e: 0.0
episode: 212/5000, score: 14, e: 0.0
episode: 213/5000, score: 14, e: 0.0
episode: 214/5000, score: 14, e: 0.0
episode: 215/5000, score: 14, e: 0.0
episode: 216/5000, score: 14, e: 0.0
episode: 217/5000, score: 14, e: 0.0
episode: 218/5000, score: 14, e: 0.0
episode: 219/5000, score: 14, e: 0.0
episode: 220/5000, score: 14, e: 0.0
episode: 221/5000, score: 14, e: 0.0
episode: 222/5000, score: 14, e: 0.0
episode: 223/5000, score: 14, e: 0.0
episode: 224/5000, score: 14, e: 0.0
episode: 225/5000, score: 14, e: 0.0
episode: 226/5000, score: 14, e: 0.0
episode: 227/5000, score: 14, e: 0.0
episode: 228/5000, score: 14, e: 0.0
episode: 229/5000, score: 14, e: 0.0
episode: 230/5000, score: 14, e: 0.0
episode: 231/5000, score: 14, e: 0.0
episode: 232/5000, score: 14, e: 0.0
episode: 233/5000, score: 14, e: 0.0
episode: 234/5000, score: 14, e: 0.0
e

[2017-04-03 01:50:50,596] Starting new video recorder writing to /tmp/tmp9iqe44lg/openaigym.video.2.286.video000343.mp4


episode: 335/5000, score: 14, e: 0.0
episode: 336/5000, score: 14, e: 0.0
episode: 337/5000, score: 14, e: 0.0
episode: 338/5000, score: 14, e: 0.0
episode: 339/5000, score: 14, e: 0.0
episode: 340/5000, score: 14, e: 0.0
episode: 341/5000, score: 14, e: 0.0
episode: 342/5000, score: 14, e: 0.0
episode: 343/5000, score: 14, e: 0.0
episode: 344/5000, score: 14, e: 0.0
episode: 345/5000, score: 14, e: 0.0
episode: 346/5000, score: 14, e: 0.0
episode: 347/5000, score: 14, e: 0.0
episode: 348/5000, score: 14, e: 0.0
episode: 349/5000, score: 14, e: 0.0
episode: 350/5000, score: 14, e: 0.0
episode: 351/5000, score: 14, e: 0.0
episode: 352/5000, score: 14, e: 0.0
episode: 353/5000, score: 14, e: 0.0
episode: 354/5000, score: 14, e: 0.0
episode: 355/5000, score: 14, e: 0.0
episode: 356/5000, score: 14, e: 0.0
episode: 357/5000, score: 14, e: 0.0
episode: 358/5000, score: 14, e: 0.0
episode: 359/5000, score: 14, e: 0.0
episode: 360/5000, score: 14, e: 0.0
episode: 361/5000, score: 14, e: 0.0
e

[2017-04-03 01:50:55,002] Starting new video recorder writing to /tmp/tmp9iqe44lg/openaigym.video.2.286.video000512.mp4


episode: 509/5000, score: 14, e: 0.0
episode: 510/5000, score: 14, e: 0.0
episode: 511/5000, score: 14, e: 0.0
episode: 512/5000, score: 14, e: 0.0
episode: 513/5000, score: 14, e: 0.0
episode: 514/5000, score: 14, e: 0.0
episode: 515/5000, score: 14, e: 0.0
episode: 516/5000, score: 14, e: 0.0
episode: 517/5000, score: 14, e: 0.0
episode: 518/5000, score: 14, e: 0.0
episode: 519/5000, score: 14, e: 0.0
episode: 520/5000, score: 14, e: 0.0
episode: 521/5000, score: 14, e: 0.0
episode: 522/5000, score: 14, e: 0.0
episode: 523/5000, score: 14, e: 0.0
episode: 524/5000, score: 14, e: 0.0
episode: 525/5000, score: 14, e: 0.0
episode: 526/5000, score: 14, e: 0.0
episode: 527/5000, score: 14, e: 0.0
episode: 528/5000, score: 14, e: 0.0
episode: 529/5000, score: 14, e: 0.0
episode: 530/5000, score: 14, e: 0.0
episode: 531/5000, score: 14, e: 0.0
episode: 532/5000, score: 14, e: 0.0
episode: 533/5000, score: 14, e: 0.0
episode: 534/5000, score: 14, e: 0.0
episode: 535/5000, score: 14, e: 0.0
e

[2017-04-03 01:50:59,865] Starting new video recorder writing to /tmp/tmp9iqe44lg/openaigym.video.2.286.video000729.mp4


episode: 725/5000, score: 14, e: 0.0
episode: 726/5000, score: 14, e: 0.0
episode: 727/5000, score: 14, e: 0.0
episode: 728/5000, score: 14, e: 0.0
episode: 729/5000, score: 14, e: 0.0
episode: 730/5000, score: 14, e: 0.0
episode: 731/5000, score: 14, e: 0.0
episode: 732/5000, score: 14, e: 0.0
episode: 733/5000, score: 14, e: 0.0
episode: 734/5000, score: 14, e: 0.0
episode: 735/5000, score: 14, e: 0.0
episode: 736/5000, score: 14, e: 0.0
episode: 737/5000, score: 14, e: 0.0
episode: 738/5000, score: 14, e: 0.0
episode: 739/5000, score: 14, e: 0.0
episode: 740/5000, score: 14, e: 0.0
episode: 741/5000, score: 14, e: 0.0
episode: 742/5000, score: 14, e: 0.0
episode: 743/5000, score: 14, e: 0.0
episode: 744/5000, score: 14, e: 0.0
episode: 745/5000, score: 14, e: 0.0
episode: 746/5000, score: 14, e: 0.0
episode: 747/5000, score: 14, e: 0.0
episode: 748/5000, score: 14, e: 0.0
episode: 749/5000, score: 14, e: 0.0
episode: 750/5000, score: 14, e: 0.0
episode: 751/5000, score: 14, e: 0.0
e

[2017-04-03 01:51:05,948] Starting new video recorder writing to /tmp/tmp9iqe44lg/openaigym.video.2.286.video001000.mp4


episode: 997/5000, score: 14, e: 0.0
episode: 998/5000, score: 14, e: 0.0
episode: 999/5000, score: 14, e: 0.0
episode: 1000/5000, score: 14, e: 0.0
episode: 1001/5000, score: 14, e: 0.0
episode: 1002/5000, score: 14, e: 0.0
episode: 1003/5000, score: 14, e: 0.0
episode: 1004/5000, score: 14, e: 0.0
episode: 1005/5000, score: 14, e: 0.0
episode: 1006/5000, score: 14, e: 0.0
episode: 1007/5000, score: 14, e: 0.0
episode: 1008/5000, score: 14, e: 0.0
episode: 1009/5000, score: 14, e: 0.0
episode: 1010/5000, score: 14, e: 0.0
episode: 1011/5000, score: 14, e: 0.0
episode: 1012/5000, score: 14, e: 0.0
episode: 1013/5000, score: 14, e: 0.0
episode: 1014/5000, score: 14, e: 0.0
episode: 1015/5000, score: 14, e: 0.0
episode: 1016/5000, score: 14, e: 0.0
episode: 1017/5000, score: 14, e: 0.0
episode: 1018/5000, score: 14, e: 0.0
episode: 1019/5000, score: 14, e: 0.0
episode: 1020/5000, score: 14, e: 0.0
episode: 1021/5000, score: 14, e: 0.0
episode: 1022/5000, score: 14, e: 0.0
episode: 1023/5

[2017-04-03 01:51:27,296] Starting new video recorder writing to /tmp/tmp9iqe44lg/openaigym.video.2.286.video002000.mp4


episode: 1996/5000, score: 14, e: 0.0
episode: 1997/5000, score: 14, e: 0.0
episode: 1998/5000, score: 14, e: 0.0
episode: 1999/5000, score: 14, e: 0.0
episode: 2000/5000, score: 14, e: 0.0
episode: 2001/5000, score: 14, e: 0.0
episode: 2002/5000, score: 14, e: 0.0
episode: 2003/5000, score: 14, e: 0.0
episode: 2004/5000, score: 14, e: 0.0
episode: 2005/5000, score: 14, e: 0.0
episode: 2006/5000, score: 14, e: 0.0
episode: 2007/5000, score: 14, e: 0.0
episode: 2008/5000, score: 14, e: 0.0
episode: 2009/5000, score: 14, e: 0.0
episode: 2010/5000, score: 14, e: 0.0
episode: 2011/5000, score: 14, e: 0.0
episode: 2012/5000, score: 14, e: 0.0
episode: 2013/5000, score: 14, e: 0.0
episode: 2014/5000, score: 14, e: 0.0
episode: 2015/5000, score: 14, e: 0.0
episode: 2016/5000, score: 14, e: 0.0
episode: 2017/5000, score: 14, e: 0.0
episode: 2018/5000, score: 14, e: 0.0
episode: 2019/5000, score: 14, e: 0.0
episode: 2020/5000, score: 14, e: 0.0
episode: 2021/5000, score: 14, e: 0.0
episode: 202

In [19]:
env.close()

[2017-04-03 01:51:38,806] Finished writing results. You can upload them to the scoreboard via gym.upload('/tmp/tmp9iqe44lg')


In [20]:
gym.upload(mdir, api_key='<YOUR API KEY>')

[2017-04-03 01:51:38,818] [CartPole-v0] Uploading 2500 episodes of training data
[2017-04-03 01:51:39,942] [CartPole-v0] Uploading videos of 12 training episodes (48049 bytes)
[2017-04-03 01:51:40,239] [CartPole-v0] Creating evaluation object from /tmp/tmp9iqe44lg with learning curve and training video
[2017-04-03 01:51:40,601] 
****************************************************
You successfully uploaded your evaluation on CartPole-v0 to
OpenAI Gym! You can find it at:

    https://gym.openai.com/evaluations/eval_gtkCynx9S6CADbVVbWsXA

****************************************************
