# Dependencies

In [32]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Dropout
from tensorflow.keras.optimizers import Adam
import numpy as np

from collections import deque
import itertools as it
import cv2
from vizdoom import *
import random
import time

# Doom-Ai

## Initial Configuration

In [53]:
def configure_game():
    game = DoomGame()
    game.set_doom_scenario_path("/home/msi-gtfo/repos/ViZDoom/scenarios/basic.wad")
    game.set_screen_resolution(ScreenResolution.RES_640X480)
    game.set_screen_format(ScreenFormat.RGB24)
    game.set_render_hud(False)
    game.set_render_crosshair(False)
    game.set_render_weapon(True)
    game.set_render_decals(False)
    game.set_render_particles(False)
    game.set_window_visible(False)

    # Available Buttons
    game.add_available_button(Button.MOVE_LEFT)
    game.add_available_button(Button.MOVE_RIGHT)
    game.add_available_button(Button.ATTACK)


    game.set_living_reward(-1)
    game.init()

    return game


## Define Q-Learning Functions

In [62]:
MIN_REPLAY_MEMORY = 1000
MAX_REPLAY_MEMORY = 5000
MINI_BATCH_SIZE   = 64
UPDATE_TARGET_EVERY = 5

DISCOUNT = 0.99

resolution = (30,45)
frame_repeat = 12

def preprocess(img):
    img = cv2.resize(img, (resolution[1],resolution[0]))
    img = img.astype(np.float32)
    return img

def create_model(n_actions):
    model = Sequential()
    model.add(Conv2D(128,3,input_shape=(resolution[0], resolution[1], 3), activation='relu', padding='same'))
    model.add(MaxPooling2D(2,2))
    model.add(Conv2D(128,3, activation='relu', padding='same'))
    model.add(MaxPooling2D(2,2))
    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(n_actions))

    opt = Adam(lr=0.001)
    model.compile(opt,'mse',['accuracy'])

    return model

class DQNAgent:
    def __init__(self, n_actions):
        self.model = create_model(n_actions)
        self.target_model = create_model(n_actions)
        self.target_model.set_weights(self.model.get_weights())

        self.replay_memory = deque(maxlen=MAX_REPLAY_MEMORY)
        self.target_update_counter = 0

    # (observation space, action, reward, new observation space, done)
    def update_replay_memory(self, transition):
        self.replay_memory.append(transition)

    # Queries main network for Q values given current observation space (environment state)
    def get_qs(self, state):
        tmp = np.array(state)
        tmp = tmp / 255
        tmp = np.expand_dims(tmp, axis=0)
        prediction = self.model.predict(tmp)
        return prediction[0]

    # Trains main network every step during episode
    def train(self, terminal_state):

        # Start training only if certain number of samples is already saved
        if len(self.replay_memory) < MIN_REPLAY_MEMORY:
            return
        
        minibatch = random.sample(self.replay_memory, MINI_BATCH_SIZE)

        current_states = np.array([transition[0] for transition in minibatch]) / 255
        current_qs_list = self.model.predict(current_states)

        new_current_states = np.array([transition[3] for transition in minibatch]) / 255
        future_qs_list = self.target_model.predict(new_current_states)

        X = []
        Y = []

        for index, (current_state, action, reward, new_current_state, done) in enumerate(minibatch):
            if not done:
                max_future_q = np.max(future_qs_list[index])
                new_q = reward + DISCOUNT * max_future_q
            else:
                new_q = reward

            current_qs = current_qs_list[index]
            current_qs[action] = new_q

            X.append(current_state)
            Y.append(current_qs)

        self.model.fit(np.array(X)/255, np.array(Y), batch_size=MINI_BATCH_SIZE, verbose=0, shuffle=False if terminal_state else None)

        if terminal_state:
            self.target_update_counter += 1

        if self.target_update_counter > UPDATE_TARGET_EVERY:
            self.target_model.set_weights(self.model.get_weights())
            self.target_update_counter = 0

In [64]:
actions = [list(a) for a in it.product([0, 1], repeat=3)]
epochs = 15

agent = DQNAgent(len(actions))
game = configure_game()

scores = []

print("Starting training")
for epoch in range(epochs):
    print("Episode ",epoch)
    game.new_episode()
   
    while not game.is_episode_finished():
        s1 = preprocess(game.get_state().screen_buffer)

        if random.random() <= 0.2:
            a = random.randint(0, len(actions) - 1)
        else:
            a = np.argmax(agent.get_qs(s1))

        reward = game.make_action(actions[a], frame_repeat)
        isterminal = game.is_episode_finished()

        s2 = preprocess(game.get_state().screen_buffer) if not isterminal else [-1]
        
        agent.update_replay_memory([s1, a, reward, s2, isterminal])      
        agent.train(isterminal)

    if game.is_episode_finished():
        final_reward = game.get_total_reward()
        print("Final Reward: ", final_reward)
        scores.append(final_reward)

train_scores = np.array(scores)

print("Results: mean: %.1f±%.1f," % (train_scores.mean(), train_scores.std()),
         "min: %.1f," % train_scores.min(), "max: %.1f," % train_scores.max())

Starting training
Episode  0
Not enough replay data:  1
Not enough replay data:  2
Not enough replay data:  3
Not enough replay data:  4
Not enough replay data:  5
Not enough replay data:  6
Not enough replay data:  7
Not enough replay data:  8
Not enough replay data:  9
Not enough replay data:  10
Not enough replay data:  11
Not enough replay data:  12
Not enough replay data:  13
Not enough replay data:  14
Not enough replay data:  15
Not enough replay data:  16
Not enough replay data:  17
Not enough replay data:  18
Not enough replay data:  19
Not enough replay data:  20
Not enough replay data:  21
Not enough replay data:  22
Not enough replay data:  23
Not enough replay data:  24


AttributeError: 'NoneType' object has no attribute 'screen_buffer'