## **Table of Contents**



1.   Setup
2.   OpenAI Gym Environment

In [None]:
# Installing OpenAI Gym and Arcade Learning Environment (ALE)
# https://github.com/openai/gym
# https://github.com/mgbellemare/Arcade-Learning-Environment

!pip install gym gym[atari,accept-rom-license]==0.22.0

Collecting gym[accept-rom-license,atari]==0.22.0
  Downloading gym-0.22.0.tar.gz (631 kB)
[K     |████████████████████████████████| 631 kB 3.2 MB/s 
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone
Collecting gym-notices>=0.0.4
  Downloading gym_notices-0.0.5-py3-none-any.whl (2.7 kB)
Collecting autorom[accept-rom-license]~=0.4.2
  Downloading AutoROM-0.4.2-py3-none-any.whl (16 kB)
Collecting ale-py~=0.7.4
  Downloading ale_py-0.7.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.6 MB)
[K     |████████████████████████████████| 1.6 MB 48.7 MB/s 
Collecting AutoROM.accept-rom-license
  Downloading AutoROM.accept-rom-license-0.4.2.tar.gz (9.8 kB)
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone
Building wheels for collected packages: gym, AutoROM

In [None]:
# Colab render visualization setup
# source: https://yashk2000.github.io/blog/rendering-openai-gym-envs-in-colab/

!apt-get install -y xvfb python-opengl ffmpeg > /dev/null 2>&1
!pip install -U colabgymrender
!pip install pygame

Collecting colabgymrender
  Downloading colabgymrender-1.0.9-py3-none-any.whl (3.1 kB)
Installing collected packages: colabgymrender
Successfully installed colabgymrender-1.0.9
Collecting pygame
  Downloading pygame-2.1.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (21.8 MB)
[K     |████████████████████████████████| 21.8 MB 82.1 MB/s 
[?25hInstalling collected packages: pygame
Successfully installed pygame-2.1.2


In [None]:
# in order to complete the DQN import, need to pip install this
# for some reason the pip install is unique on colab, sourced from here: https://github.com/seungjaeryanlee/osim-rl-helper/issues/7

!pip install keras-rl2

Collecting keras-rl2
  Downloading keras_rl2-1.0.5-py3-none-any.whl (52 kB)
[?25l[K     |██████▎                         | 10 kB 15.3 MB/s eta 0:00:01[K     |████████████▋                   | 20 kB 12.3 MB/s eta 0:00:01[K     |██████████████████▉             | 30 kB 7.1 MB/s eta 0:00:01[K     |█████████████████████████▏      | 40 kB 5.5 MB/s eta 0:00:01[K     |███████████████████████████████▍| 51 kB 4.0 MB/s eta 0:00:01[K     |████████████████████████████████| 52 kB 623 kB/s 
Collecting tf-estimator-nightly==2.8.0.dev2021122109
  Downloading tf_estimator_nightly-2.8.0.dev2021122109-py2.py3-none-any.whl (462 kB)
[K     |████████████████████████████████| 462 kB 7.8 MB/s 
Installing collected packages: tf-estimator-nightly, keras-rl2
Successfully installed keras-rl2-1.0.5 tf-estimator-nightly-2.8.0.dev2021122109


In [None]:
# !pip freeze

In [None]:
# You cant import these after the model creation for some reason, the sequentialmemory import messes up the model. Not sure why.

from rl.agents import DQNAgent
from rl.memory import SequentialMemory, sample_batch_indexes, zeroed_observation, EpisodeParameterMemory
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy, BoltzmannQPolicy, SoftmaxPolicy, GreedyQPolicy, MaxBoltzmannQPolicy

In [None]:
import gym
import ale_py

import matplotlib.pyplot as plt
%matplotlib inline 

%tensorflow_version 2.x
import tensorflow as tf

import os
import shutil

import numpy as np

import warnings
import random




In [None]:
# Ignore warnings
warnings.filterwarnings('ignore')

In [None]:
# Set matplotlib sizes
plt.rc('font', size=20)
plt.rc('axes', titlesize=20)
plt.rc('axes', labelsize=20)
plt.rc('xtick', labelsize=20)
plt.rc('ytick', labelsize=20)
plt.rc('legend', fontsize=20)
plt.rc('figure', titlesize=20)

In [None]:
# Random Seed

# The random seed
random_seed = 42

# Set random seed in tensorflow
tf.random.set_seed(random_seed)

# Set random seed in numpy
np.random.seed(random_seed)

In [None]:
# Check what version of TF we are using
print(tf.version.VERSION)

2.8.0


In [None]:
# Print the number of GPUs available
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

# Test to see if GPU is found and connected
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  print('GPU device not found')
else:
  print('\nFound GPU at: {}'.format(device_name))
  print('\nCurrently using:')
  !nvidia-smi -L

Num GPUs Available:  0
GPU device not found


Google Colab does not have a built in way to display, so we will have to create our own. There is a couple different methods to do this, but I was using method 3 found here: https://yashk2000.github.io/blog/rendering-openai-gym-envs-in-colab/ 

# Visualization Setup

In [None]:
# create a dummy environment to store the video of the game 
# if we were to actually store the video we would have to remove the old video every time 
# import os
os.environ['SDL_VIDEODRIVER']='dummy'
import pygame
pygame.display.set_mode((1280,960))
# we can make the display bigger here when we want to expand it

<Surface(1280x960x32 SW)>

In [None]:
# # A quick test using the example from the source
# from colabgymrender.recorder import Recorder

# env = gym.make('CartPole-v0')
# directory = './video'
# env = Recorder(env, directory)

# curr_state = env.reset()
# done = False
# while not done:
#   action = env.action_space.sample()
#   curr_state, _, done, info = env.step(action)

# env.play()

# Ms. Pacman Random Model

Now let's try it with Ms. Pacman and the random model

In [None]:
# # Just messing around with gym - Josh

env = gym.make('ALE/MsPacman-v5')
height, width, channels = env.observation_space.shape
actions = env.action_space.n

In [None]:
# print(height, width, channels)

In [None]:
# # Our agent's possible actions
# env.unwrapped.get_action_meanings()

In [None]:
# # testing the random model 
# # code from here: https://www.youtube.com/watch?v=hCeJeq8U0lo
# # we remove env.render() and instead include env.play()
# # episodes is the number of games
# episodes = 1
# for episode in range(1, episodes+1):
#     state = env.reset()
#     done = False
#     score = 0 
    
#     while not done:
#         action = random.choice([0,1,2,3,4,5,6,7,8])
#         n_state, reward, done, info = env.step(action)
#         score+=reward
#     print('Episode:{} Score:{}'.format(episode, score))
#     env.play()
# env.close()
# # still probably better than I would do

# 2. Create a Deep Learning Model with Keras

In [None]:
# Again taken from: https://www.youtube.com/watch?v=hCeJeq8U0lo
# Here we build the CNN. We can alter all of the model creation later in order to make the model as successful as possible
from keras.models import Sequential
from keras.layers import Dense, Flatten, Convolution2D
from tensorflow.keras.optimizers import Adam

In [None]:
def build_model(height, width, channels, actions):
    model = Sequential()
    model.add(Convolution2D(32, (8,8), strides=(4,4), activation='relu', input_shape=(3,height, width, channels)))
    model.add(Convolution2D(64, (4,4), strides=(2,2), activation='relu'))
    model.add(Convolution2D(64, (3,3), activation='relu'))
    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model

In [None]:
# if you make a mistake, remember to delete the model
del model

In [None]:
model = build_model(height, width, channels, actions)

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 3, 51, 39, 32)     6176      
                                                                 
 conv2d_1 (Conv2D)           (None, 3, 24, 18, 64)     32832     
                                                                 
 conv2d_2 (Conv2D)           (None, 3, 22, 16, 64)     36928     
                                                                 
 flatten (Flatten)           (None, 67584)             0         
                                                                 
 dense (Dense)               (None, 512)               34603520  
                                                                 
 dense_1 (Dense)             (None, 256)               131328    
                                                                 
 dense_2 (Dense)             (None, 9)                 2

# 3. Build Agent with Keras-RL

In [None]:
# here I needed to change enable_dueling_network to False, otherwise I was getting a recurssion error
def build_agent(model, actions):
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.2, value_test=0.4, nb_steps=10000)
    memory = SequentialMemory(limit=100000, window_length=3)
    dqn = DQNAgent(model=model, memory=memory, policy=policy,
                  enable_dueling_network=False, dueling_type='avg',
                   nb_actions=actions, nb_steps_warmup=100
                  )
    return dqn

In [None]:
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3))

In [None]:
#, callbacks = callbacks

In [None]:
# this will take a very long time to run unless you lower the steps
# right now we are running at 2000 steps, with an 1000 step warmup in the agent, so it is barely fitting at all
# in the video he mentioned that the creators of the algorithm recommended to run this for 10 million steps, so that may be what we need the GPU EC2 instance for
# if you dont want to wait, just press the stop button on the cell and it will stop the model training where it is with no error
dqn.fit(env, nb_steps=3000, visualize=False, verbose=2)
#### I will see if I can figure out how to train the fitting process, as this is the funny part where our model looks stupid

Training for 3000 steps ...


In [None]:
from colabgymrender.recorder import Recorder
directory = './video'
env = Recorder(env, directory)

In [None]:
# this is not gonna be much better than the random model cause we are hardly training, just wanted to get it all working first
# I also created a loop so it will play the video of each test after each try, since we cannot use Visualize = True
# the model is already fit here, so each result does not build off the last one, these tests are their own independent trials (to my understanding)
i = 0
avg_calc = []
while i < 10:
  scores = dqn.test(env, nb_episodes=1, visualize=False)
  avg_calc.append(scores.history['episode_reward'][0])
  env.play()
  i += 1
print('Average Score:', sum(avg_calc)/len(avg_calc))

In [None]:
# to save our model:
# dqn.save_weights('insertfilepathhere/dqn_weights.h5f')
# to load a model:
# del model, dqn
# dqn.load_weights('insertfilepathhere/dqn_weights.h5f')