In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import tensorflow as tf
import numpy as np

from tensorflow.keras import layers
from doom_src import utilities
from vizdoom import *

First, let's load the config file and create a new game instance

In [3]:
config = utilities.get_config('configs/config.json')

In [4]:
game, actions = utilities.create_environment(config)

We will now need a DQN model

In [5]:
class DQN_D():
    """
    Define the Deep-Q Network to play Doom.
    """
    
    def __init__(self, config):
        frame_size = config['frame_size']
        stack_size = config['stack_size']
        learning_rate = config['learning_rate']
        
        model = tf.keras.Sequential()
        
        # Convolutional layer 1
        model.add(
            layers.Convolution2D(
                filters=32, 
                kernel_size=(8, 8),
                strides=(4,4),
                padding='valid',
                input_shape=(frame_size, frame_size, stack_size),
            )
        )
        
        model.add(
            layers.BatchNormalization(
                epsilon = 1e-5,
            )
        )

        # Convolutional layer 2
        model.add(
            layers.Convolution2D(
                filters=64, 
                kernel_size=(4, 4),
                strides=(2,2),
                padding='valid',
            )
        )
        
        model.add(
            layers.BatchNormalization(
                epsilon = 1e-5,
            )
        )
        
        # Convolutional layer 3
        model.add(
            layers.Convolution2D(
                filters=64, 
                kernel_size=(4, 4),
                strides=(1,1),
                padding='valid',
            )
        )
        
        model.add(
            layers.BatchNormalization(
                epsilon = 1e-5,
            )
        )
    
        # Flatten before passing to dense layers
        model.add(layers.Flatten())
        
        # Dense layer 1
        model.add(
            layers.Dense(
                units=512,
                activation='relu',
            )
        )
        
        # Dense layer 2
        model.add(
            layers.Dense(
                units=2,
                activation='relu',
            )
        )
        
        self.op = tf.keras.optimizers.Adam(lr=learning_rate)
        model.compile(loss='mse',optimizer=self.op)
        self.model = model

In [6]:
def train_net(config, n_episodes):
    
    policy_net = DQN_D(config)
    
    memory = utilities.Memory(10000)
    
    epsilon = utilities.LinearSchedule(
        config['annealing_steps'], 
        config['annealing_stop'], 
        config['annealing_start']
    )
    
    for episode in range(n_episodes):
        print(epsilon.value(episode))

    
    

In [7]:
train_net(config, 10)

1.0
0.99901
0.99802
0.99703
0.99604
0.99505
0.99406
0.99307
0.99208
0.99109
