#Keras RL and OpenAI Gym Tutorial/Template
Main Steps:


Keras-rl v0.4.2 works best with keras 2.2.4 and tensorflow 1.13.1

In [0]:
!pip install keras-rl
!pip install keras==2.2.4
!pip install tensorflow-gpu==1.13.1 

Collecting keras-rl
[?25l  Downloading https://files.pythonhosted.org/packages/ab/87/4b57eff8e4bd834cea0a75cd6c58198c9e42be29b600db9c14fafa72ec07/keras-rl-0.4.2.tar.gz (40kB)
[K     |████████                        | 10kB 18.2MB/s eta 0:00:01[K     |████████████████▏               | 20kB 2.3MB/s eta 0:00:01[K     |████████████████████████▎       | 30kB 3.4MB/s eta 0:00:01[K     |████████████████████████████████| 40kB 2.4MB/s 
Building wheels for collected packages: keras-rl
  Building wheel for keras-rl (setup.py) ... [?25l[?25hdone
  Created wheel for keras-rl: filename=keras_rl-0.4.2-cp36-none-any.whl size=48379 sha256=0d1fbe455c5b1781ab44e1b6333b4fdb69e41f404f97a500fa8df818290dacf2
  Stored in directory: /root/.cache/pip/wheels/7d/4d/84/9254c9f2e8f51865cb0dac8e79da85330c735551d31f73c894
Successfully built keras-rl
Installing collected packages: keras-rl
Successfully installed keras-rl-0.4.2
Collecting keras==2.2.4
[?25l  Downloading https://files.pythonhosted.org/package

##1. Import and setup gym enviornment

In [0]:
import gym
import atari_py
import numpy as np

print("List of all the games available: {}".format(atari_py.list_games()))

game = 'Freeway-v0' #Name of the game you want to use
env = gym.make(game)
observation_shape = env.observation_space.shape
nb_actions = env.action_space.n
print("Game is: "+game+", Observation space: {}, Action Size: {}".format(observation_shape,nb_actions))

List of all the games available: ['kaboom', 'time_pilot', 'chopper_command', 'asterix', 'ms_pacman', 'adventure', 'elevator_action', 'kung_fu_master', 'robotank', 'boxing', 'jamesbond', 'centipede', 'journey_escape', 'riverraid', 'defender', 'pooyan', 'crazy_climber', 'zaxxon', 'pong', 'amidar', 'wizard_of_wor', 'fishing_derby', 'gravitar', 'kangaroo', 'road_runner', 'bank_heist', 'gopher', 'battle_zone', 'hero', 'star_gunner', 'qbert', 'breakout', 'space_invaders', 'phoenix', 'solaris', 'tennis', 'asteroids', 'frostbite', 'assault', 'double_dunk', 'alien', 'seaquest', 'carnival', 'demon_attack', 'atlantis', 'up_n_down', 'ice_hockey', 'air_raid', 'freeway', 'krull', 'private_eye', 'venture', 'beam_rider', 'pitfall', 'name_this_game', 'video_pinball', 'bowling', 'yars_revenge', 'berzerk', 'tutankham', 'montezuma_revenge', 'skiing', 'enduro']
Game is: Freeway-v0, Observation space: (210, 160, 3), Action Size: 3


#2. Defining the Deep Learning Model
This model will have a CNN which will proccess the frame of the game and then it will be followed by dense layer. The output of the network will have the same number as the actions space.
To reduce training time the frames of game are down-sized and converted to grayscale therefore the input shape is different from the observation space of the game.

In [0]:
from keras.models import Sequential
from keras.layers import Conv2D, Dense, Flatten, Permute

FRAME_SHAPE = (84, 84) #This is the down-sized shape of the input frame
WINDOW_LENGTH = 4 #The number specifies the number of frames the model will consider to make it decision
input_shape = (WINDOW_LENGTH,) + FRAME_SHAPE #The input shape of our model

model = Sequential()
model.add(Permute((2, 3, 1), input_shape=input_shape))
model.add(Conv2D(32, (8, 8), strides=(4, 4), activation = 'relu'))
model.add(Conv2D(64, (4, 4), strides=(2, 2), activation = 'relu'))
model.add(Conv2D(64, (3, 3), strides=(1, 1), activation = 'relu'))
model.add(Flatten())
model.add(Dense(512, activation = 'relu'))
model.add(Dense(nb_actions, activation = 'linear'))
 
print(model.summary())


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
permute_3 (Permute)          (None, 84, 84, 4)         0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 20, 20, 32)        8224      
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 9, 9, 64)          32832     
_________________________________________________________________
conv2d_9 (Conv2D)            (None, 7, 7, 64)          36928     
_________________________________________________________________
flatten_3 (Flatten)          (None, 3136)              0         
_________________________________________________________________
dense_5 (Dense)              (None, 512)               1606144   
_________________________________________________________________
dense_6 (Dense)              (None, 3)                 1539      
Total para

#3. Initializing DQN model
In the following code the following methods are defined:
1. AtariProcessor() method is used to pre-process every frame
2. save_at_episode() is used to save model weight after a fixed number of episode

Then the DQN model is initalized with the parameters shown below.

In [0]:
from PIL import Image
from keras.optimizers import Adam
from keras.callbacks import Callback
from rl.agents.dqn import DQNAgent
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy
from rl.memory import SequentialMemory
from rl.core import Processor

SAVE_AFTER_EPISODE = 50

class AtariProcessor(Processor):
    def process_observation(self, observation):
        assert observation.ndim == 3  # (height, width, channel)
        img = Image.fromarray(observation)
        img = img.resize(FRAME_SHAPE).convert('L')  # resize and convert to grayscale
        processed_observation = np.array(img)
        assert processed_observation.shape == FRAME_SHAPE
        return processed_observation.astype('float32')/255 

class save_at_episode(Callback):
    def on_episode_end(self, episode, logs={}):
      if(episode%SAVE_AFTER_EPISODE == 0):
        self.model.save_weights('DQN_'+game+'_episode_{}_weights'.format(episode), overwrite=True)
        print(" saved episode:{}".format(episode))

memory = SequentialMemory(limit=500000, window_length=WINDOW_LENGTH) #This is the memory the model will use to remember past experiences
processor = AtariProcessor() #passing the pre-processor to resize and convert images
nb_steps = 100000 #Number of Training Steps
lr = 0.00025 #Learning Rate
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=nb_steps) #This policy will balance between exploration and exploitaion

dqn = DQNAgent(model=model, 
               nb_actions=nb_actions, 
               policy=policy, 
               memory=memory, 
               processor=processor, 
               nb_steps_warmup=10000, #The number of random steps in the begining of training.
               target_model_update=10000, #The model weights will be updated after these many steps
               train_interval=WINDOW_LENGTH) #Q-Values and Model Training will occur once for the number of steps in this parameter

dqn.compile(Adam(lr=lr), metrics=['mae'])

#4. Model Training


In [0]:
dqn.fit(env, nb_steps=nb_steps, verbose=1, callbacks=[save_at_episode()])
dqn.save_weights('DQN_'+game+'_{}steps_weights'.format(nb_steps),overwrite=True)

Training for 100000 steps ...
Interval 1 (0 steps performed)
3 episodes - episode_reward: 0.000 [0.000, 0.000] - ale.lives: 0.000

Interval 2 (10000 steps performed)
4 episodes - episode_reward: 0.000 [0.000, 0.000] - loss: 0.000 - mean_absolute_error: 0.025 - mean_q: 0.029 - mean_eps: 0.865 - ale.lives: 0.000

Interval 3 (20000 steps performed)
3 episodes - episode_reward: 0.333 [0.000, 1.000] - loss: 0.000 - mean_absolute_error: 0.031 - mean_q: 0.047 - mean_eps: 0.775 - ale.lives: 0.000

Interval 4 (30000 steps performed)
4 episodes - episode_reward: 0.750 [0.000, 2.000] - loss: 0.000 - mean_absolute_error: 0.031 - mean_q: 0.047 - mean_eps: 0.685 - ale.lives: 0.000

Interval 5 (40000 steps performed)
4 episodes - episode_reward: 2.250 [0.000, 6.000] - loss: 0.000 - mean_absolute_error: 0.031 - mean_q: 0.047 - mean_eps: 0.595 - ale.lives: 0.000

Interval 6 (50000 steps performed)
3 episodes - episode_reward: 3.000 [1.000, 6.000] - loss: 0.000 - mean_absolute_error: 0.031 - mean_q: 0.0

#5. Model Testing
Model is evaluated based on the average reward over 100 episode

To view the game change visualize to True (This will not work in google collab) 


In [0]:
from statistics import mean
dqn.load_weights('DQN_'+game+'_{}steps_weights'.format(nb_steps)) #load the weight of the model that needs to be evaluated
history = dqn.test(env, nb_episodes=100, visualize=False)
mean_score = mean(history.history['episode_reward'])
print("100 Episode Average Score: {}".format(mean_score)) #compute mean score over 100 episode

Testing for 100 episodes ...
Episode 1: reward: 21.000, steps: 2746
Episode 2: reward: 21.000, steps: 2724
Episode 3: reward: 21.000, steps: 2752
Episode 4: reward: 21.000, steps: 2724
Episode 5: reward: 21.000, steps: 2737
Episode 6: reward: 21.000, steps: 2728
Episode 7: reward: 21.000, steps: 2730
Episode 8: reward: 21.000, steps: 2704
Episode 9: reward: 21.000, steps: 2748
Episode 10: reward: 21.000, steps: 2702
Episode 11: reward: 21.000, steps: 2732
Episode 12: reward: 24.000, steps: 2702
Episode 13: reward: 24.000, steps: 2716
Episode 14: reward: 21.000, steps: 2728
Episode 15: reward: 21.000, steps: 2741
Episode 16: reward: 21.000, steps: 2721
Episode 17: reward: 24.000, steps: 2702
Episode 18: reward: 24.000, steps: 2731
Episode 19: reward: 21.000, steps: 2733
Episode 20: reward: 24.000, steps: 2726
Episode 21: reward: 21.000, steps: 2741
Episode 22: reward: 21.000, steps: 2724
Episode 23: reward: 21.000, steps: 2702
Episode 24: reward: 21.000, steps: 2732
Episode 25: reward: 