# Playing a Game with Neural Networks

## Simulating game environments

In [1]:
!pip install h5py
!pip install gym
!conda install -c menpo ffmpeg

Collecting package metadata: ...working... done
Solving environment: ...working... done

# All requested packages already installed.



In [2]:
import numpy as np 
import gym

In [3]:
env = gym.make('CartPole-v0')
np.random.seed(42), env.seed(42) 
nb_actions = env.action_space.n
input_shape = (1, env.observation_space.shape[0])

In [4]:
observation = env.reset()
for t in range(200):
    env.render()
    act = env.action_space.sample()
    obs, rwrd, done, info = env.step(act)
    if done:
        print("Episode concluded after %i timesteps" % (t+1))
        break
env.close()

Episode concluded after 12 timesteps


## Presenting Q-learning

In [5]:
!pip install keras-rl



In [6]:
from keras.models import Sequential 
from keras.layers import Dense, Activation 
from keras.layers import Flatten, Dropout 
from keras.optimizers import Adam
from rl.agents.dqn import DQNAgent
from rl.policy import EpsGreedyQPolicy
from rl.memory import SequentialMemory

Using TensorFlow backend.


In [7]:
model = Sequential()
model.add(Flatten(input_shape=input_shape))
model.add(Dense(12))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear')) 

print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 4)                 0         
_________________________________________________________________
dense_1 (Dense)              (None, 12)                60        
_________________________________________________________________
activation_1 (Activation)    (None, 12)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 26        
_________________________________________________________________
activation_2 (Activation)    (None, 2)                 0         
Total params: 86
Trainable params: 86
Non-trainable params: 0
_________________________________________________________________
None


In [8]:
policy = EpsGreedyQPolicy(eps=0.3)
memory = SequentialMemory(limit=50000, 
                          window_length=1)

dqn = DQNAgent(model=model, 
               nb_actions=nb_actions, 
               memory=memory, 
               nb_steps_warmup=50, 
               target_model_update=0.01, 
               policy=policy)

dqn.compile(Adam(lr=0.001))

training = dqn.fit(env, nb_steps=30000, 
                   visualize=False, verbose=1)

Training for 30000 steps ...
Interval 1 (0 steps performed)
219 episodes - episode_reward: 45.128 [8.000, 200.000] - loss: 3.995 - mean_q: 21.590

Interval 2 (10000 steps performed)
51 episodes - episode_reward: 195.588 [93.000, 200.000] - loss: 10.790 - mean_q: 52.841

Interval 3 (20000 steps performed)
done, took 59.886 seconds


In [9]:
env = gym.make('CartPole-v0')
mon = gym.wrappers.Monitor(env, 
                           "./gym-results",
                           force=True)
mon.reset()
dqn.test(mon, nb_episodes=1, visualize=True)
mon.close()
env.close()

Testing for 1 episodes ...
Episode 1: reward: 200.000, steps: 200


In [10]:
import io
import base64
from IPython.display import HTML

template = './gym-results/openaigym.video.%s.video000001.mp4'
video = io.open(template % mon.file_infix, 'r+b').read()
encoded = base64.b64encode(video)
HTML(data='''
<video width="520" height="auto" alt="test" controls>
<source src="data:video/mp4;base64,{0}"
 type="video/mp4" />
</video>'''.format(encoded.decode('ascii')))