In [None]:
%matplotlib inline
import matplotlib.pyplot as plt


import numpy as np
import random
import time
import os
import gc
import math

from keras.models import Sequential, clone_model
from keras.layers import Dense, Flatten, Conv2D, InputLayer, BatchNormalization
from keras.callbacks import CSVLogger, TensorBoard
from keras.optimizers import Adam
import keras.backend as K
import rl
#from rl.layers import NoisyNetDense
from rl.agents.dqn import DQNAgent
from rl.policy import GreedyQPolicy
from keras import initializers
import keras.engine
from keras.engine import InputSpec
from keras.callbacks import LearningRateScheduler
import collections, itertools

import gym

plt.rcParams['figure.figsize'] = (9, 9)

### Creating network

In [None]:
class NoisyDense(Dense):
    def __init__(self, units, **kwargs):
        self.output_dim = units
        super(NoisyDense, self).__init__(units, **kwargs)

    def build(self, input_shape):
        assert len(input_shape) >= 2
        self.input_dim = input_shape[-1]

        self.kernel = self.add_weight(shape=(self.input_dim, self.units),
                                      initializer=self.kernel_initializer,
                                      name='kernel',
                                      regularizer=None,
                                      constraint=None)

        # Zweiter Kernel (trainable weights) für Steuerung des Zufalls.
        self.kernel_sigma = self.add_weight(shape=(self.input_dim, self.units),
                                      initializer=initializers.Constant(0.017),
                                      name='sigma_kernel',
                                      regularizer=None,
                                      constraint=None)

        if self.use_bias:
            self.bias = self.add_weight(shape=(self.units,),
                                        initializer=self.bias_initializer,
                                        name='bias',
                                        regularizer=None,
                                        constraint=None)

            # trainable, Steuerung des Zufalls des Bias.
            self.bias_sigma = self.add_weight(shape=(self.units,),
                                        initializer=initializers.Constant(0.017),
                                        name='bias_sigma',
                                        regularizer=None,
                                        constraint=None)
        else:
            self.bias = None

        self.input_spec = InputSpec(min_ndim=2, axes={-1: self.input_dim})
        self.built = True

    def call(self, inputs):
        # Erzeugen der Matrix mit Zufallszahlen (bei jedem Aufruf neu erzeugt) - Vektor-Version
        # (siehe Noisy Nets Paper) wäre effizienter.
        self.kernel_epsilon = K.random_normal(shape=(self.input_dim, self.units))

        w = self.kernel + K.tf.multiply(self.kernel_sigma, self.kernel_epsilon)
        output = K.dot(inputs, w)

        if self.use_bias:
            # Erzeugung Zufallsvektor für Bias-Zufall.
            self.bias_epsilon = K.random_normal(shape=(self.units,))

            b = self.bias + K.tf.multiply(self.bias_sigma, self.bias_epsilon)
            output = output + b
        if self.activation is not None:
            output = self.activation(output)
        return output

In [None]:
def create_dqn_model(input_shape, nb_actions, dense_layers, dense_units):
    model = Sequential()
    model.add(InputLayer(input_shape=input_shape))
    model.add(NoisyDense(units=dense_units, activation='relu'))
    #model.add(NoisyDense(units=dense_units, activation='relu'))
    for i in range(dense_layers):
        #model.add(NoisyDense(units=dense_units, activation='relu'))
        model.add(Dense(units=dense_units, activation='relu'))
    model.add(NoisyDense(units=dense_units, activation='relu'))
    for i in range(dense_layers):
        #model.add(NoisyDense(units=dense_units, activation='relu'))
        model.add(Dense(units=dense_units, activation='relu'))
    #model.add(BatchNormalization())
    model.add(Dense(nb_actions, activation='linear'))
    return model

In [None]:
input_shape = obs.shape
nb_actions = env.action_space.n  # 9
dense_layers = 5
dense_units = 256

online_network = create_dqn_model(input_shape, nb_actions, dense_layers, dense_units)
online_network.summary()

### Loading weights

In [None]:
weights_folder = './Competition/MsPacman_DQN_9/weights'
online_network.load_weights(os.path.join(weights_folder, 'weights_last.h5f'))

### Collecting scores

In [None]:
ngames = 100
eps = 0.05
render = False

scores = test_dqn(ngames, online_network, eps=eps, render=render)

print('\nMean score: ', np.mean(scores))
print('\nMax score: ', np.max(scores))
print('\nFifth percentile: ',np.percentile(scores, 95))
print('\nPercentiles:')
print([ np.percentile(scores, p) for p in [0, 25, 50, 75, 100] ])

### Rendering

In [None]:
import time
ngames = 5
eps = 0.05
render = True

scores = test_dqn(ngames, online_network, eps=eps, render=render)

print('\nMean score: ', np.mean(scores))
print('\nMax score: ', np.max(scores))
print('\nPercentiles:')
print([ np.percentile(scores, p) for p in [0, 25, 50, 75, 100] ])