# iLykei Lecture Series

# Advanced Machine Learning and Artificial Intelligence (MScA 32017)

# Pac-Man Competition for Human-Machine Teams 

### Y.Balasanov, M. Tselishchev, &copy; iLykei 2018

## Preparation

In [6]:
!pip3 install gym

Collecting gym
  Downloading gym-0.17.2.tar.gz (1.6 MB)
[K     |████████████████████████████████| 1.6 MB 3.3 MB/s eta 0:00:01
[?25hCollecting cloudpickle<1.4.0,>=1.2.0
  Downloading cloudpickle-1.3.0-py2.py3-none-any.whl (26 kB)
Collecting pyglet<=1.5.0,>=1.4.0
  Downloading pyglet-1.5.0-py2.py3-none-any.whl (1.0 MB)
[K     |████████████████████████████████| 1.0 MB 22.1 MB/s eta 0:00:01     |██████▍                         | 204 kB 22.1 MB/s eta 0:00:01
Building wheels for collected packages: gym
  Building wheel for gym (setup.py) ... [?25ldone
[?25h  Created wheel for gym: filename=gym-0.17.2-py3-none-any.whl size=1650892 sha256=ec5e1495a8b68ae3e2032f1463b0dd641abde28bab8c9012f2e391822c67c451
  Stored in directory: /home/targoon/.cache/pip/wheels/48/bf/7c/44b1b8e4ad998fc48e31caedbb9e028351861b8d20632642bc
Successfully built gym
Installing collected packages: cloudpickle, pyglet, gym
Successfully installed cloudpickle-1.3.0 gym-0.17.2 pyglet-1.5.0


In [7]:
import numpy as np
import random
import time
import os
import gc

from keras.models import Sequential, clone_model
from keras.layers import Dense, Flatten, Conv2D, InputLayer
from keras.callbacks import CSVLogger, TensorBoard
from keras.optimizers import Adam
import keras.backend as K

import gym

Load trained model (which was previously saved by `model.save()`-method) for online network:

In [5]:
env = gym.make("MsPacman-ram-v0")
env.action_space  # actions are integers from 0 to 8

  result = entry_point.load(False)


Discrete(9)

In [6]:
obs = env.reset()

In [10]:
def create_dqn_model(input_shape, nb_actions, dense_layers, dense_units):
    model = Sequential()
    model.add(InputLayer(input_shape=input_shape))
    for i in range(dense_layers):
        model.add(Dense(units=dense_units, activation='relu'))
    model.add(Dense(nb_actions, activation='linear'))
    return model

Create a network using specific input shape and action space size. We call this network *online*.

In [11]:
input_shape = obs.shape
nb_actions = env.action_space.n  # 9
dense_layers = 6
dense_units = 256

online_network = create_dqn_model(input_shape, nb_actions, dense_layers, dense_units)
online_network.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 256)               33024     
_________________________________________________________________
dense_2 (Dense)              (None, 256)               65792     
_________________________________________________________________
dense_3 (Dense)              (None, 256)               65792     
_________________________________________________________________
dense_4 (Dense)              (None, 256)               65792     
_________________________________________________________________
dense_5 (Dense)              (None, 256)               65792     
_________________________________________________________________
dense_6 (Dense)              (None, 256)               65792     
_________________________________________________________________
dense_7 (Dense)              (None, 9)                 2313      
Total para

In [13]:
online_network.load_weights('weights_1350000.h5f')

Define $\varepsilon$-greedy strategy (using small $\varepsilon$):

In [14]:
def epsilon_greedy(q_values, epsilon, n_outputs):
    if random.random() < epsilon:
        return random.randrange(n_outputs)  # random action
    else:
        return np.argmax(q_values)          # q-optimal action

In [28]:
from keras.models import load_model
model.load_model('pacman1.h5', compile=False)

AttributeError: 'Sequential' object has no attribute 'load_model'

## Testing model

Define a function to evalutate the trained network. 
Note that we still using $\varepsilon$-greedy strategy here to prevent an agent from getting stuck. 
`test_dqn` returns a list with scores for specific number of games.

In [15]:
def test_dqn(n_games, model, nb_actions=9, skip_start=90, eps=0.05, render=False, sleep_time=0.01):
    env = gym.make("MsPacman-ram-v0")
    scores = []
    for i in range(n_games):
        obs = env.reset()
        score = 0
        done = False
        for skip in range(skip_start):  # skip the start of each game (it's just freezing time before game starts)
            obs, reward, done, info = env.step(0)
            score += reward
        while not done:
            state = obs
            q_values = model.predict(np.array([state]))[0]
            action = epsilon_greedy(q_values, eps, nb_actions)
            obs, reward, done, info = env.step(action)
            score += reward
            if render:
                env.render()
                time.sleep(sleep_time)
                if done:
                    time.sleep(1)
        scores.append(score)
        # print('{}/{}: {}'.format(i+1, n_games, score))
        env.close()
    return scores

### Collecting scores

Run 100 games without rendering and collect necessary statistics for final score.

In [21]:
ngames = 100
eps = 0.05
render = False

scores = test_dqn(ngames, online_network, eps=eps, render=render)

print('\nMean score: ', np.mean(scores))
print('\nMax score: ', np.max(scores))
print('\nFifth percentile: ',np.percentile(scores, 95))
print('\nPercentiles:')
print([ np.percentile(scores, p) for p in [0, 25, 50, 75, 100] ])


Mean score:  763.6

Max score:  2340.0

Fifth percentile:  1641.9999999999998

Percentiles:
[190.0, 320.0, 675.0, 1100.0, 2340.0]


### Rendering

Play 3 more times with rendering

In [23]:
import time
ngames = 5
eps = 0.05
render = True

scores = test_dqn(ngames, online_network, eps=eps, render=render)

print('\nMean score: ', np.mean(scores))
print('\nMax score: ', np.max(scores))
print('\nPercentiles:')
print([ np.percentile(scores, p) for p in [0, 25, 50, 75, 100] ])


Mean score:  560.0

Max score:  680.0

Percentiles:
[250.0, 600.0, 620.0, 650.0, 680.0]
