In [1]:
import os
import random
import numpy as np
import matplotlib.pyplot as plt
from time import sleep
from IPython.display import clear_output

from skimage import transform
from collections import deque

In [2]:
from vizdoom import DoomGame, Button, GameVariable, ScreenFormat, ScreenResolution, Mode

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as T

In [4]:
import warnings
warnings.filterwarnings('ignore')

## Set Configs

In [5]:
is_cuda = torch.cuda.is_available()

if is_cuda: device = torch.device('cuda')
else: device = torch.device('cpu')

## Set Environment

In [6]:
game = DoomGame()

In [7]:
def init_scenario(game, scenario_name):

    game.load_config(f'./scenarios/{scenario_name}.cfg'); game.set_doom_scenario_path(f'./scenarios/{scenario_name}.wad');

    game.set_screen_resolution(ScreenResolution.RES_640X480)
    game.set_screen_format(ScreenFormat.RGB24)
    game.set_render_hud(False)
    game.set_render_crosshair(False)
    game.set_render_weapon(True)
    game.set_render_decals(False)
    game.set_render_particles(False)
    game.add_available_button(Button.MOVE_LEFT)
    game.add_available_button(Button.MOVE_RIGHT)
    game.add_available_button(Button.ATTACK)
    game.add_available_button(Button.LOOK_UP_DOWN_DELTA)
    game.add_available_game_variable(GameVariable.AMMO2)
    game.set_episode_timeout(200)
    game.set_episode_start_time(10)
    game.set_window_visible(True)
    game.set_sound_enabled(True)
    game.set_living_reward(-1)
    game.set_mode(Mode.PLAYER)

    print(f'{scenario_name} initialized.')
    game.init()

In [8]:
scenarios = ['basic', 'deathmatch', 'defend_the_center', 'health_gathering', 'my_way_home']

for scenario_name in scenarios:
    
    init_scenario(game, scenario_name)

    shoot = [0, 0, 1]
    left = [1, 0, 0]
    right = [0, 1, 0]
    actions = [shoot, left, right]

    num_episodes = 2
    for i in range(1, num_episodes+1):
        game.new_episode()

        total_reward = 0
        while not game.is_episode_finished():
            state = game.get_state()
            img = state.screen_buffer
            misc = state.game_variables
            reward = game.make_action(random.choice(actions))

            total_reward += reward
            print(f'Scenario: {scenario_name}, Episode: {i}/{num_episodes}, Average Reward: {total_reward/i:.2f}')
            clear_output(wait=True)
            sleep(0.01)

        print(f'Scenario: {scenario_name}, Episode: {i}/{num_episodes}, Total Reward: {game.get_total_reward()}', end='')
        clear_output(wait=True)
        sleep(2)

    game.close()
    
init_scenario(game, 'deadly_corridor') # change playing scenario

deadly_corridor initialized.


## Preprocess Frame

In [9]:
def preprocess_frame(frame):
    
    # crop the screen (remove part that contains no information)
    cropped_frame = frame[15:-5, 20:-20]
    
    # normalize pixel values
    normalized_frame = cropped_frame/ 255.0
    
    # resize the crop frame
    preprocesed_frame = transform.resize(cropped_frame, [100, 120])
    
    return preprocess_frame

In [10]:
stack_size = 4
stacked_frames = deque([np.zeros((100, 200), dtype=np.int) for _ in range(stack_size)], maxlen=4)

In [11]:
def stack_frames(stacked_frames, state, is_new_episode):
    
    # preprocess frame
    frame = preprocess_frame(state)
    
    if is_new_episode:
        
        # clear out the stacked frames
        stacked_frames = deque([np.zeros((100, 120), dtype=np.int) for _ in range(stack_size)], maxlen=4)
        
        # since in a new episode, copy the same frame 4x
        stacked_frames.append(frame)
        stacked_frames.append(frame)
        stacked_frames.append(frame)
        stacked_frames.append(frame)
        
        # stack the frames
        stacked_state = np.stack(stacked_frames, axis=2)
        
    else:
        
        # append frame to deque automatically removes the oldest frame
        stacked_frames.append(frame)
        
        # build the stacked state (first dimension specifies different frames)
        stacked_state = np.stack(stacked_frames, axis=2)
        
    return stacked_state, stacked_frames

## Build DQN Architecture

In [12]:
class DQN(nn.Module):
    """Define DQN architecture."""
    
    def __init__(self):
        """Initialize parameters and build model."""
        
        super(DQN, self).__init__()
        
        self.relu = nn.ReLU()
        self.conv1_layer = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=6, stride=2, padding=2)
        
        self.conv2_1_layer = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.conv2_2_layer = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1)
        
        self.conv3_layer = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=6, stride=2, padding=2)
        
        self.conv4_1_later = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.conv4_2_later = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1)
        
        self.conv5_layer = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=6, stride=2, padding=2)
        
        self.fc_layer = nn.Linear(8*8*64, 1024)
        self.state_layer = nn.Linear(1024, 1)
        self.advantage_layer = nn.Linear(1024, 3)
        
    def forward(self, frame):
        
        x = self.relu(self.conv1_layer(frame))
        y = self.relu(self.conv2_1_layer(x))
        y = self.conv2_2_layer(y)
        
        x = self.relu(x + y)
        
        x = self.relu(self.conv3_layer(x))
        y = self.relu(self.conv4_1_later(x))
        y = self.conv4_2_later(y)
        
        x = self.relu(x + y)
        
        x = self.relu(self.conv5_layer(x))
        x = self.relu(self.fc_layer(x.view(x.size(0), -1)))
        
        state_value = self.state_layer(x)
        advantage_value = self.advantage_layer(x)
        
        Qsa = state_value + advantage_value - torch.mean(advantage_value, dim=1, keepdim=True)
        
        return Qsa

## Set Replay Buffer

In [13]:
class ReplayBuffer(object):
    """Fixed-size buffer to store experience tuples."""
    
    def __init__(self, action_size, buffer_size, batch_size, seed):
        """Initialize a ReplayMemory object."""
        
        self.experience = namedtuple("Experience", field_names=["state", "action", "reward", "next_state", "done"])
        self.seed = random.seed(seed)
        
        self.action_size = action_size
        self.memory = deque(maxlen=buffer_size)
        self.batch_size = batch_size

    def add(self, state, action, reward, next_state, done):
        """Add a new experience to buffer."""
        
        self.memory.append(self.experience(state, action, reward, next_state, done))
        
    def sample(self):
        """Randomly sample a batch of experiences from memory."""
        experiences = random.sample(self.memory, k=self.batch_size)
        
        states = torch.from_numpy(np.vstack([exp.state for exp in experiences if exp is not None])).float()
        states = states.to(device)
        
        actions = torch.from_numpy(np.vstack([exp.action for exp in experiences if exp is not None])).long()
        actions = actions.to(device)
        
        rewards = torch.from_numpy(np.vstack([exp.reward for exp in experiences if exp is not None])).float()
        rewards = rewards.to(device)
        
        next_states = torch.from_numpy(np.vstack([exp.next_state for exp in experiences if exp is not None])).float()
        next_states = next_states.to(device)
        
        dones = torch.from_numpy(np.vstack([exp.done for exp in experiences if exp is not None]).astype(np.uint8)).float()
        dones = dones.to(device)
        
        return (states, actions, rewards, next_states, dones)
        
    def __len__(self):
        """Return the current size of internal memory."""
        return len(self.memory)

## Define CNN DQN Agent

In [14]:
class CNN_DQNAgent():
    """The agent interacting with and learning from the environment."""
    
    def __init__(self, state_size, action_size, seed):
        """Initialize an agent object."""
        pass

---