In [None]:
# Installing OpenAI Gym and Arcade Learning Environment (ALE)
# https://github.com/openai/gym
# https://github.com/mgbellemare/Arcade-Learning-Environment

!pip install gym gym[atari,accept-rom-license]==0.22.0
!pip install pygame
!apt-get install -y xvfb python-opengl ffmpeg > /dev/null 2>&1
!pip install -U colabgymrender
!pip install keras-rl2

In [2]:
import gym
import ale_py
import pygame
from colabgymrender.recorder import Recorder

import matplotlib.pyplot as plt
%matplotlib inline 

%tensorflow_version 2.x
import tensorflow as tf
from tensorflow import keras

import os
import shutil

import numpy as np

import warnings
import random

from rl.agents import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy

from keras.models import Sequential
from keras.layers import Dense, Flatten, Convolution2D
from tensorflow.keras.optimizers import Adam




Imageio: 'ffmpeg-linux64-v3.3.1' was not found on your computer; downloading it now.
Try 1. Download from https://github.com/imageio/imageio-binaries/raw/master/ffmpeg/ffmpeg-linux64-v3.3.1 (43.8 MB)
Downloading: 8192/45929032 bytes (0.0%)1228800/45929032 bytes (2.7%)3276800/45929032 bytes (7.1%)4079616/45929032 bytes (8.9%)5914624/45929032 bytes (12.9%)8101888/45929032 bytes (17.6%)10264576/45929032 bytes (22.3%)12312576/45929032 bytes (26.8%)14229504/45929032 bytes (31.0%)15679488/45929032 bytes (34.1%)17637376/45929032 bytes (38.4%)19521536/45929032 bytes (42.5%)21774336/45929032 bytes (47.4%)2400256

In [3]:
# Ignore warnings
warnings.filterwarnings('ignore')

In [4]:
# Set matplotlib sizes
plt.rc('font', size=20)
plt.rc('axes', titlesize=20)
plt.rc('axes', labelsize=20)
plt.rc('xtick', labelsize=20)
plt.rc('ytick', labelsize=20)
plt.rc('legend', fontsize=20)
plt.rc('figure', titlesize=20)

In [5]:
# The random seed
random_seed = 42
tf.random.set_seed(random_seed)
np.random.seed(random_seed)

In [6]:
# Print the number of GPUs available
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

# Test to see if GPU is found and connected
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  print('GPU device not found')
else:
  print('\nFound GPU at: {}'.format(device_name))
  print('\nCurrently using:')
  !nvidia-smi -L

Num GPUs Available:  1

Found GPU at: /device:GPU:0

Currently using:
GPU 0: Tesla K80 (UUID: GPU-42ca706c-9d6f-d6d1-dcb1-af7a95062f79)


In [7]:
os.environ['SDL_VIDEODRIVER']='dummy'
pygame.display.set_mode((640,480))

<Surface(640x480x32 SW)>

In [8]:
env = gym.make('ALE/MsPacman-v5')
height, width, channels = env.observation_space.shape
actions = env.action_space.n
directory = './video'
env = Recorder(env, directory)

## Random Model

In [9]:
episodes = 1
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0 
    
    while not done:
        action = random.choice([0,1,2,3,4,5,6,7,8])
        n_state, reward, done, info = env.step(action)
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))
    env.play()
env.close()

Episode:1 Score:680.0


## DQN Model

In [10]:
def build_model(height, width, channels, actions):
    model = Sequential()
    model.add(Convolution2D(32, (8,8), strides=(4,4), activation='relu', input_shape=(3,height, width, channels)))
    model.add(Convolution2D(64, (4,4), strides=(2,2), activation='relu'))
    model.add(Convolution2D(64, (3,3), activation='relu'))
    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model
  
def build_agent(model, actions):
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.2, nb_steps=10000)
    memory = SequentialMemory(limit=1000, window_length=3)
    dqn = DQNAgent(model=model, memory=memory, policy=policy,
                  enable_dueling_network=True, dueling_type='avg',
                   nb_actions=actions, nb_steps_warmup=1000
                  )
    return dqn

In [11]:
model = build_model(height, width, channels, actions)

In [12]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 3, 51, 39, 32)     6176      
                                                                 
 conv2d_1 (Conv2D)           (None, 3, 24, 18, 64)     32832     
                                                                 
 conv2d_2 (Conv2D)           (None, 3, 22, 16, 64)     36928     
                                                                 
 flatten (Flatten)           (None, 67584)             0         
                                                                 
 dense (Dense)               (None, 512)               34603520  
                                                                 
 dense_1 (Dense)             (None, 256)               131328    
                                                                 
 dense_2 (Dense)             (None, 9)                 2

In [13]:
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-4))

In [14]:
dqn.fit(env, nb_steps=2000, visualize=False, verbose=2)

Training for 2000 steps ...
  520/2000: episode: 1, duration: 17.273s, episode steps: 520, steps per second:  30, episode reward: 370.000, mean reward:  0.712 [ 0.000, 10.000], mean action: 3.837 [0.000, 8.000],  loss: --, mean_q: --, mean_eps: --
 1064/2000: episode: 2, duration: 73.574s, episode steps: 544, steps per second:   7, episode reward: 240.000, mean reward:  0.441 [ 0.000, 10.000], mean action: 4.062 [0.000, 8.000],  loss: 1038.351632, mean_q: 73.948208, mean_eps: 0.907120
 1570/2000: episode: 3, duration: 489.541s, episode steps: 506, steps per second:   1, episode reward: 230.000, mean reward:  0.455 [ 0.000, 10.000], mean action: 4.042 [0.000, 8.000],  loss: 5.771256, mean_q: 55.059624, mean_eps: 0.881515
done, took 1005.487 seconds


<keras.callbacks.History at 0x7f27d1267b50>

In [15]:
i = 0
avg_calc = []
while i < 3:
  scores = dqn.test(env, nb_episodes=1, visualize=False)
  avg_calc.append(scores.history['episode_reward'][0])
  env.play()
  i += 1
print('Average Score:', sum(avg_calc)/len(avg_calc))

Testing for 1 episodes ...
Episode 1: reward: 210.000, steps: 522


Testing for 1 episodes ...
Episode 1: reward: 1640.000, steps: 636


Testing for 1 episodes ...
Episode 1: reward: 340.000, steps: 784


Average Score: 730.0


## Messing around with building a Model Class

In [16]:
class Model:
  def __init__(game = 'ALE/MsPacman-v5'):
    env = gym.make(game)
    self.height, self.width, self.channels = env.observation_space.shape
    self.actions = env.action_space.n
    directory = './video'
    self.env = Recorder(env, directory)

  def build_model(self):
    model = Sequential()
    model.add(Convolution2D(32, (8,8), strides=(4,4), activation='relu', input_shape=(3,self.height, self.width, self.channels)))
    model.add(Convolution2D(64, (4,4), strides=(2,2), activation='relu'))
    model.add(Convolution2D(64, (3,3), activation='relu'))
    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(self.actions, activation='linear'))
    self.model = model
  
  def build_agent(self):
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.2, nb_steps=10000)
    memory = SequentialMemory(limit=1000, window_length=3)
    dqn = DQNAgent(model=self.model, memory=memory, policy=policy,
                  enable_dueling_network=True, dueling_type='avg',
                   nb_actions=self.actions, nb_steps_warmup=1000
                  )
    self.agent = dqn

  def train():
    self.dqn.compile(Adam(lr=1e-4))
    dqn.fit(env, nb_steps=2000, visualize=False, verbose=2)
  
  def play(episodes = 3):
    i = 0
    avg_calc = []
    while i < episodes:
      scores = self.dqn.test(self.env, nb_episodes=1, visualize=False)
      avg_calc.append(scores.history['episode_reward'][0])
      self.env.play()
      i += 1
    print('Average Score:', sum(avg_calc)/len(avg_calc))