In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import tensorflow as tf
import numpy as np

from tensorflow.keras import layers
from doom_src import utilities
from collections import deque
from vizdoom import *

First, let's load the config file and create a new game instance

In [3]:
config = utilities.get_config('configs/config.json')

In [4]:
game, actions = utilities.create_environment(config)

We will now need a DQN model

In [5]:
class DQN_D():
    """
    Define the Deep-Q Network to play Doom.
    """
    
    def __init__(self, config):
        frame_size = config['frame_size']
        stack_size = config['stack_size']
        learning_rate = config['learning_rate']
        
        model = tf.keras.Sequential()
        
        # Convolutional layer 1
        model.add(
            layers.Convolution2D(
                filters=32, 
                kernel_size=(8, 8),
                strides=(4,4),
                padding='valid',
                input_shape=(frame_size, frame_size, stack_size),
            )
        )
        
        model.add(
            layers.BatchNormalization(
                epsilon = 1e-5,
            )
        )

        # Convolutional layer 2
        model.add(
            layers.Convolution2D(
                filters=64, 
                kernel_size=(4, 4),
                strides=(2,2),
                padding='valid',
            )
        )
        
        model.add(
            layers.BatchNormalization(
                epsilon = 1e-5,
            )
        )
        
        # Convolutional layer 3
        model.add(
            layers.Convolution2D(
                filters=64, 
                kernel_size=(4, 4),
                strides=(1,1),
                padding='valid',
            )
        )
        
        model.add(
            layers.BatchNormalization(
                epsilon = 1e-5,
            )
        )
    
        # Flatten before passing to dense layers
        model.add(layers.Flatten())
        
        # Dense layer 1
        model.add(
            layers.Dense(
                units=512,
                activation='relu',
            )
        )
        
        # Dense layer 2
        model.add(
            layers.Dense(
                units=3,
                activation='relu',
            )
        )
        
        self.op = tf.keras.optimizers.Adam(lr=learning_rate)
        model.compile(loss='mse',optimizer=self.op)
        self.model = model

In [12]:
def train_net(config, n_episodes, game, actions):
    """
    Train the Q Network
    """
    
    stack_size      = config['stack_size']
    frame_size      = config['frame_size']
    pretrain_steps  = config['pretrain_steps']
    batch_size      = config['batch_size']
    memory_size     = config['memory_size']
    annealing_steps = config['annealing_steps']
    annealing_stop  = config['annealing_stop']
    annealing_start = config['annealing_start']
    
    # Initialize the DQN
    policy_net = DQN_D(config)
    
    # Inititalize the memory buffer
    memory = utilities.Memory(memory_size)
    
    # Initialize the linear annealing scheduler
    epsilon = utilities.LinearSchedule(
        annealing_steps, 
        annealing_stop, 
        annealing_start
    )
    
    # Inititalize the stack of frames
    stacked_frames  =  deque([np.zeros((84,84), dtype=np.int) for i in range(stack_size)], maxlen=4) 
    
    # Fill up the memory buffer
    utilities.pretrain(
        pretrain_steps,
        memory,
        stack_size,
        frame_size,
        stacked_frames,
        game,
        actions
    )
    
    for episode in range(n_episodes):
        eps = epsilon.value(episode)
    
    
    print(len(memory.sample(batch_size)))
        
        

    
    

In [13]:
train_net(config, 10, game, actions)

32
