In [1]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

# E. Culurciello, L. Mueller, Z. Boztoprak
# December 2020

from __future__ import print_function
import vizdoom as vzd
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import random
import itertools as it
import skimage.transform

from vizdoom import Mode
from time import sleep, time
from collections import deque
from tqdm import trange

In [None]:
#GPU epoch 64 steps 12000 batch 64 test ep 100: 919.33 minutes ~24 hours

In [2]:
# Q-learning settings
learning_rate = 0.00025
discount_factor = 0.99
train_epochs = 64
learning_steps_per_epoch = 12000
replay_memory_size = 1000000

# NN learning settings
batch_size = 64

# Training regime
test_episodes_per_epoch = 100

# Other parameters
frame_repeat = 4
resolution = (30, 45)
episodes_to_watch = 10

model_savefile = "/home/spillingvoid/programs/Wilco/model/wilco-doom-gladiator.pth"
save_model = True
load_model = True
skip_learning = False

In [3]:
# Configuration file path
#config_file_path = "/home/spillingvoid/programs/ViZDoom/scenarios/simpler_basic.cfg"
config_file_path = "/home/spillingvoid/programs/ViZDoom/scenarios/Doom10.cfg"
# config_file_path = "/home/spillingvoid/programs/ViZDoom/scenarios/basic.cfg"

In [4]:
# Uses GPU if available
if torch.cuda.is_available():
    DEVICE = torch.device('cuda')
    torch.backends.cudnn.benchmark = True
else:
    DEVICE = torch.device('cpu')

In [5]:
print("Device used", DEVICE)

Device used cuda


In [6]:
def preprocess(img):
    """Down samples image to resolution"""
    img = skimage.transform.resize(img, resolution)
    img = img.astype(np.float32)
    img = np.expand_dims(img, axis=0)
    return img


def create_simple_game():
    print("Initializing doom...")
    game = vzd.DoomGame()
    game.load_config(config_file_path)
    game.set_window_visible(True)
    game.set_mode(Mode.PLAYER)
    game.set_screen_format(vzd.ScreenFormat.GRAY8)
    game.set_screen_resolution(vzd.ScreenResolution.RES_640X480)
    game.init()
    print("Doom initialized.")

    return game


def test(game, agent):
    """Runs a test_episodes_per_epoch episodes and prints the result"""
    print("\nTesting...")
    test_scores = []
    for test_episode in trange(test_episodes_per_epoch, leave=False):
        game.new_episode()
        while not game.is_episode_finished():
            state = preprocess(game.get_state().screen_buffer)
            best_action_index = agent.get_action(state)

            game.make_action(actions[best_action_index], frame_repeat)
        r = game.get_total_reward()
        test_scores.append(r)

    test_scores = np.array(test_scores)
    print("Results: mean: %.1f +/- %.1f," % (
        test_scores.mean(), test_scores.std()), "min: %.1f" % test_scores.min(),
          "max: %.1f" % test_scores.max())

In [7]:
def run(game, agent, actions, num_epochs, frame_repeat, steps_per_epoch=2000):
    """
    Run num epochs of training episodes.
    Skip frame_repeat number of frames after each action.
    """

    start_time = time()

    for epoch in range(num_epochs):
        game.new_episode()
        train_scores = []
        global_step = 0
        print("\nEpoch #" + str(epoch + 1))

        for _ in trange(steps_per_epoch, leave=False):
            state = preprocess(game.get_state().screen_buffer)
            action = agent.get_action(state)
            reward = game.make_action(actions[action], frame_repeat)
            done = game.is_episode_finished()

            if not done:
                next_state = preprocess(game.get_state().screen_buffer)
            else:
                next_state = np.zeros((1, 30, 45)).astype(np.float32)

            agent.append_memory(state, action, reward, next_state, done)

            if global_step > agent.batch_size:
                agent.train()

            if done:
                train_scores.append(game.get_total_reward())
                game.new_episode()

            global_step += 1

        agent.update_target_net()
        train_scores = np.array(train_scores)

        print("Results: mean: %.1f +/- %.1f," % (train_scores.mean(), train_scores.std()),
              "min: %.1f," % train_scores.min(), "max: %.1f," % train_scores.max())

        test(game, agent)
        if save_model:
            print("Saving the network weights to:", model_savefile)
            torch.save(agent.q_net, model_savefile)
        print("Total elapsed time: %.2f minutes" % ((time() - start_time) / 60.0))

    game.close()
    return agent, game

In [8]:
class DuelQNet(nn.Module):
    """
    This is Duel DQN architecture.
    see https://arxiv.org/abs/1511.06581 for more information.
    """

    def __init__(self, available_actions_count):
        super(DuelQNet, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 8, kernel_size=3, stride=2, bias=True),
            nn.BatchNorm2d(8),
            nn.ReLU()
        )

        self.conv2 = nn.Sequential(
            nn.Conv2d(8, 8, kernel_size=3, stride=2, bias=True),
            nn.BatchNorm2d(8),
            nn.ReLU()
        )

        self.conv3 = nn.Sequential(
            nn.Conv2d(8, 8, kernel_size=3, stride=1, bias=True),
            nn.BatchNorm2d(8),
            nn.ReLU()
        )

        self.conv4 = nn.Sequential(
            nn.Conv2d(8, 16, kernel_size=3, stride=1, bias=True),
            nn.BatchNorm2d(16),
            nn.ReLU()
        )

        self.state_fc = nn.Sequential(
            nn.Linear(96, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )

        self.advantage_fc = nn.Sequential(
            nn.Linear(96, 64),
            nn.ReLU(),
            nn.Linear(64, available_actions_count)
        )

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = x.view(-1, 192)
        x1 = x[:, :96]  # input for the net to calculate the state value
        x2 = x[:, 96:]  # relative advantage of actions in the state
        state_value = self.state_fc(x1).reshape(-1, 1)
        advantage_values = self.advantage_fc(x2)
        x = state_value + (advantage_values - advantage_values.mean(dim=1).reshape(-1, 1))

        return x

In [9]:
class DQNAgent:
    def __init__(self, action_size, memory_size, batch_size, discount_factor, 
                 lr, load_model, epsilon=1, epsilon_decay=0.9996, epsilon_min=0.1):
        self.action_size = action_size
        self.epsilon = epsilon
        self.epsilon_decay = epsilon_decay
        self.epsilon_min = epsilon_min
        self.batch_size = batch_size
        self.discount = discount_factor
        self.lr = lr
        self.memory = deque(maxlen=memory_size)
        self.criterion = nn.MSELoss()

        if load_model:
            print("Loading model from: ", model_savefile)
            self.q_net = torch.load(model_savefile)
            self.target_net = torch.load(model_savefile)
            self.epsilon = self.epsilon_min

        else:
            print("Initializing new model")
            self.q_net = DuelQNet(action_size).to(DEVICE)
            self.target_net = DuelQNet(action_size).to(DEVICE)

        self.opt = optim.SGD(self.q_net.parameters(), lr=self.lr)

    def get_action(self, state):
        if np.random.uniform() < self.epsilon:
            return random.choice(range(self.action_size))
        else:
            state = np.expand_dims(state, axis=0)
            state = torch.from_numpy(state).float().to(DEVICE)
            action = torch.argmax(self.q_net(state)).item()
            return action

    def update_target_net(self):
        print("Updating Target Net")
        self.target_net.load_state_dict(self.q_net.state_dict())

    def append_memory(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def train(self):
        
        batch = random.sample(self.memory, self.batch_size)
        batch = np.array(batch, dtype=object)

        states = np.stack(batch[:, 0]).astype(float)
        actions = batch[:, 1].astype(int)
        rewards = batch[:, 2].astype(float)
        next_states = np.stack(batch[:, 3]).astype(float)
        dones = batch[:, 4].astype(bool)
        not_dones = ~dones

        row_idx = np.arange(self.batch_size)  # used for indexing the batch

        # value of the next states with double q learning
        # see https://arxiv.org/abs/1509.06461 for more information on double q learning
        with torch.no_grad():
            next_states = torch.from_numpy(next_states).float().to(DEVICE)
            idx = row_idx, np.argmax(self.q_net(next_states).cpu().data.numpy(), 1)
            next_state_values = self.target_net(next_states).cpu().data.numpy()[idx]
            next_state_values = next_state_values[not_dones]

        # this defines y = r + discount * max_a q(s', a)
        q_targets = rewards.copy()
        q_targets[not_dones] += self.discount * next_state_values
        q_targets = torch.from_numpy(q_targets).float().to(DEVICE)

        # this selects only the q values of the actions taken
        idx = row_idx, actions
        states = torch.from_numpy(states).float().to(DEVICE)
        action_values = self.q_net(states)[idx].float().to(DEVICE)

        self.opt.zero_grad()
        td_error = self.criterion(q_targets, action_values)
        td_error.backward()
        self.opt.step()

        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
        else:
            self.epsilon = self.epsilon_min

In [10]:
if __name__ == '__main__':
    # Initialize game and actions
    game = create_simple_game()
    n = game.get_available_buttons_size()
    actions = [list(a) for a in it.product([0, 1], repeat=n)]

    # Initialize our agent with the set parameters
    agent = DQNAgent(len(actions), lr=learning_rate, batch_size=batch_size,
                     memory_size=replay_memory_size, discount_factor=discount_factor,
                     load_model=load_model)

    # Run the training for the set number of epochs
    if not skip_learning:
        agent, game = run(game, agent, actions, num_epochs=train_epochs, 
                          frame_repeat=frame_repeat,
                          steps_per_epoch=learning_steps_per_epoch)

        print("======================================")
        print("Training finished. It's time to watch!")

    # Reinitialize the game with window visible
    game.close()
    game.set_window_visible(True)
    game.set_mode(Mode.ASYNC_PLAYER)
    game.init()

    for _ in range(episodes_to_watch):
        game.new_episode()
        while not game.is_episode_finished():
            state = preprocess(game.get_state().screen_buffer)
            best_action_index = agent.get_action(state)

            # Instead of make_action(a, frame_repeat) in order to make the animation smooth
            game.set_action(actions[best_action_index])
            for _ in range(frame_repeat):
                game.advance_action()

        # Sleep between episodes
        sleep(1.0)
        score = game.get_total_reward()
        print("Total score: ", score)

Initializing doom...
Doom initialized.
Loading model from:  /home/spillingvoid/programs/Wilco/model/wilco-doom.pth


  0%|          | 0/12000 [00:00<?, ?it/s]


Epoch #1


  0%|          | 0/100 [00:00<?, ?it/s]              

Updating Target Net
Results: mean: -25.9 +/- 107.1, min: -360.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<10:58, 18.21it/s]

Results: mean: -3.7 +/- 86.0, min: -335.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 14.66 minutes

Epoch #2


  1%|          | 1/100 [00:00<00:11,  8.37it/s]      

Updating Target Net
Results: mean: 5.1 +/- 84.4, min: -365.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<10:47, 18.53it/s]

Results: mean: 20.7 +/- 63.8, min: -300.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 30.07 minutes

Epoch #3


  0%|          | 0/100 [00:00<?, ?it/s]              

Updating Target Net
Results: mean: 3.5 +/- 80.6, min: -360.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<11:29, 17.40it/s]

Results: mean: -0.3 +/- 81.7, min: -310.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 45.62 minutes

Epoch #4


  0%|          | 0/100 [00:00<?, ?it/s]              

Updating Target Net
Results: mean: 7.0 +/- 77.1, min: -355.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<12:37, 15.84it/s]

Results: mean: -4.2 +/- 93.8, min: -335.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 61.16 minutes

Epoch #5


  0%|          | 0/100 [00:00<?, ?it/s]              

Updating Target Net
Results: mean: 12.2 +/- 73.5, min: -350.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<12:09, 16.44it/s]

Results: mean: 10.4 +/- 82.6, min: -340.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 76.68 minutes

Epoch #6


  0%|          | 0/100 [00:00<?, ?it/s]              

Updating Target Net
Results: mean: 2.3 +/- 85.7, min: -355.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<11:26, 17.48it/s]

Results: mean: -1.3 +/- 93.5, min: -320.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 90.87 minutes

Epoch #7


  1%|          | 1/100 [00:00<00:14,  6.64it/s]      

Updating Target Net
Results: mean: -7.1 +/- 95.2, min: -360.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<10:01, 19.95it/s]

Results: mean: -17.1 +/- 115.2, min: -325.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 104.72 minutes

Epoch #8


  1%|          | 1/100 [00:00<00:15,  6.40it/s]      

Updating Target Net
Results: mean: -9.7 +/- 101.9, min: -345.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<11:17, 17.71it/s]

Results: mean: 2.9 +/- 90.9, min: -325.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 118.56 minutes

Epoch #9


  0%|          | 0/100 [00:00<?, ?it/s]              

Updating Target Net
Results: mean: -11.1 +/- 100.2, min: -355.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<10:19, 19.38it/s]

Results: mean: -30.3 +/- 118.1, min: -340.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 132.63 minutes

Epoch #10


  1%|          | 1/100 [00:00<00:14,  6.85it/s]      

Updating Target Net
Results: mean: -40.1 +/- 120.9, min: -350.0, max: 66.0,

Testing...


  0%|          | 3/12000 [00:00<10:09, 19.68it/s]

Results: mean: -51.4 +/- 134.6, min: -335.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 146.80 minutes

Epoch #11


  0%|          | 0/100 [00:00<?, ?it/s]              

Updating Target Net
Results: mean: -45.1 +/- 120.8, min: -365.0, max: 66.0,

Testing...


  0%|          | 0/12000 [00:00<?, ?it/s]        

Results: mean: -62.1 +/- 134.0, min: -345.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 160.99 minutes

Epoch #12


  0%|          | 0/100 [00:00<?, ?it/s]              

Updating Target Net
Results: mean: -71.7 +/- 129.2, min: -365.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<11:36, 17.22it/s]

Results: mean: -58.6 +/- 128.0, min: -335.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 175.15 minutes

Epoch #13


  0%|          | 0/100 [00:00<?, ?it/s]              

Updating Target Net
Results: mean: -99.1 +/- 140.3, min: -360.0, max: 66.0,

Testing...


  0%|          | 3/12000 [00:00<10:37, 18.81it/s]

Results: mean: -99.6 +/- 137.4, min: -340.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 189.23 minutes

Epoch #14


  0%|          | 0/100 [00:00<?, ?it/s]              

Updating Target Net
Results: mean: -104.0 +/- 142.8, min: -360.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<11:32, 17.31it/s]

Results: mean: -104.3 +/- 140.4, min: -340.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 203.51 minutes

Epoch #15


  0%|          | 0/100 [00:00<?, ?it/s]              

Updating Target Net
Results: mean: -113.8 +/- 141.9, min: -350.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<10:52, 18.40it/s]

Results: mean: -131.3 +/- 135.8, min: -335.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 217.89 minutes

Epoch #16


  0%|          | 0/100 [00:00<?, ?it/s]              

Updating Target Net
Results: mean: -114.8 +/- 148.1, min: -350.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<11:09, 17.92it/s]

Results: mean: -103.7 +/- 156.2, min: -355.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 232.04 minutes

Epoch #17


  0%|          | 0/100 [00:00<?, ?it/s]              

Updating Target Net
Results: mean: -111.2 +/- 150.2, min: -370.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<12:35, 15.88it/s]

Results: mean: -153.9 +/- 150.1, min: -350.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 246.41 minutes

Epoch #18


  0%|          | 0/100 [00:00<?, ?it/s]              

Updating Target Net
Results: mean: -116.9 +/- 152.0, min: -365.0, max: 66.0,

Testing...


  0%|          | 0/12000 [00:00<?, ?it/s]        

Results: mean: -125.5 +/- 160.2, min: -345.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 260.66 minutes

Epoch #19


  1%|          | 1/100 [00:00<00:17,  5.62it/s]      

Updating Target Net
Results: mean: -104.7 +/- 152.2, min: -365.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<10:47, 18.54it/s]

Results: mean: -83.8 +/- 137.0, min: -360.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 274.75 minutes

Epoch #20


  0%|          | 0/100 [00:00<?, ?it/s]              

Updating Target Net
Results: mean: -92.9 +/- 152.6, min: -365.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<11:48, 16.93it/s]

Results: mean: -101.0 +/- 154.9, min: -340.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 288.93 minutes

Epoch #21


  0%|          | 0/100 [00:00<?, ?it/s]              

Updating Target Net
Results: mean: -104.8 +/- 154.1, min: -365.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<11:49, 16.90it/s]

Results: mean: -108.2 +/- 148.7, min: -350.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 303.14 minutes

Epoch #22


  0%|          | 0/100 [00:00<?, ?it/s]              

Updating Target Net
Results: mean: -102.8 +/- 151.7, min: -355.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<12:48, 15.62it/s]

Results: mean: -119.4 +/- 155.2, min: -350.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 317.45 minutes

Epoch #23


  0%|          | 0/100 [00:00<?, ?it/s]              

Updating Target Net
Results: mean: -108.2 +/- 153.7, min: -355.0, max: 66.0,

Testing...


  0%|          | 0/12000 [00:00<?, ?it/s]        

Results: mean: -138.4 +/- 159.7, min: -345.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 331.83 minutes

Epoch #24


  0%|          | 0/100 [00:00<?, ?it/s]              

Updating Target Net
Results: mean: -126.5 +/- 155.2, min: -355.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<13:55, 14.35it/s]

Results: mean: -104.9 +/- 147.6, min: -345.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 346.08 minutes

Epoch #25


  1%|          | 1/100 [00:00<00:15,  6.46it/s]      

Updating Target Net
Results: mean: -90.2 +/- 146.6, min: -355.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<12:15, 16.31it/s]

Results: mean: -118.0 +/- 152.9, min: -350.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 360.29 minutes

Epoch #26


  0%|          | 0/100 [00:00<?, ?it/s]              

Updating Target Net
Results: mean: -87.3 +/- 149.4, min: -350.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<11:01, 18.14it/s]

Results: mean: -72.8 +/- 144.4, min: -345.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 374.38 minutes

Epoch #27


  1%|          | 1/100 [00:00<00:14,  6.66it/s]      

Updating Target Net
Results: mean: -113.6 +/- 160.1, min: -355.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<11:43, 17.05it/s]

Results: mean: -98.2 +/- 155.9, min: -355.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 388.45 minutes

Epoch #28


  0%|          | 0/100 [00:00<?, ?it/s]              

Updating Target Net
Results: mean: -84.6 +/- 153.0, min: -355.0, max: 66.0,

Testing...


  0%|          | 3/12000 [00:00<09:23, 21.30it/s]

Results: mean: -100.9 +/- 155.6, min: -345.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 402.58 minutes

Epoch #29


  0%|          | 0/100 [00:00<?, ?it/s]              

Updating Target Net
Results: mean: -101.8 +/- 155.5, min: -365.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<11:21, 17.61it/s]

Results: mean: -109.4 +/- 155.0, min: -330.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 416.74 minutes

Epoch #30


  0%|          | 0/100 [00:00<?, ?it/s]              

Updating Target Net
Results: mean: -76.2 +/- 145.8, min: -365.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<12:44, 15.69it/s]

Results: mean: -92.7 +/- 159.1, min: -345.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 431.06 minutes

Epoch #31


  0%|          | 0/100 [00:00<?, ?it/s]              

Updating Target Net
Results: mean: -60.6 +/- 136.0, min: -365.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<12:31, 15.96it/s]

Results: mean: -60.1 +/- 137.6, min: -340.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 445.18 minutes

Epoch #32


  1%|          | 1/100 [00:00<00:14,  7.07it/s]      

Updating Target Net
Results: mean: -49.3 +/- 128.0, min: -365.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<11:04, 18.06it/s]

Results: mean: -63.8 +/- 129.0, min: -345.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 459.37 minutes

Epoch #33


  1%|          | 1/100 [00:00<00:13,  7.21it/s]      

Updating Target Net
Results: mean: -44.1 +/- 120.8, min: -355.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<10:14, 19.52it/s]

Results: mean: -44.8 +/- 121.5, min: -330.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 473.48 minutes

Epoch #34


  0%|          | 0/100 [00:00<?, ?it/s]              

Updating Target Net
Results: mean: -50.7 +/- 129.4, min: -350.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<10:37, 18.81it/s]

Results: mean: -68.7 +/- 134.8, min: -355.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 487.87 minutes

Epoch #35


  0%|          | 0/100 [00:00<?, ?it/s]              

Updating Target Net
Results: mean: -57.7 +/- 135.2, min: -350.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<11:42, 17.07it/s]

Results: mean: -93.8 +/- 150.2, min: -335.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 502.06 minutes

Epoch #36


  0%|          | 0/100 [00:00<?, ?it/s]              

Updating Target Net
Results: mean: -66.5 +/- 139.0, min: -355.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<10:55, 18.29it/s]

Results: mean: -62.3 +/- 140.5, min: -340.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 516.20 minutes

Epoch #37


  1%|          | 1/100 [00:00<00:18,  5.28it/s]      

Updating Target Net
Results: mean: -61.6 +/- 137.6, min: -355.0, max: 66.0,

Testing...


  0%|          | 0/12000 [00:00<?, ?it/s]        

Results: mean: -61.0 +/- 140.2, min: -350.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 530.35 minutes

Epoch #38


  0%|          | 0/100 [00:00<?, ?it/s]              

Updating Target Net
Results: mean: -68.0 +/- 135.4, min: -355.0, max: 66.0,

Testing...


  0%|          | 3/12000 [00:00<10:19, 19.35it/s]

Results: mean: -61.8 +/- 140.2, min: -335.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 544.60 minutes

Epoch #39


  0%|          | 0/100 [00:00<?, ?it/s]              

Updating Target Net
Results: mean: -48.6 +/- 128.4, min: -360.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<11:16, 17.74it/s]

Results: mean: -37.0 +/- 129.9, min: -345.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 558.81 minutes

Epoch #40


  0%|          | 0/100 [00:00<?, ?it/s]              

Updating Target Net
Results: mean: -53.8 +/- 133.4, min: -355.0, max: 66.0,

Testing...


  0%|          | 3/12000 [00:00<09:23, 21.29it/s]

Results: mean: -63.3 +/- 131.7, min: -335.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 573.15 minutes

Epoch #41


  0%|          | 0/100 [00:00<?, ?it/s]              

Updating Target Net
Results: mean: -54.5 +/- 134.8, min: -355.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<13:29, 14.81it/s]

Results: mean: -47.0 +/- 131.2, min: -335.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 587.34 minutes

Epoch #42


  1%|          | 1/100 [00:00<00:15,  6.44it/s]      

Updating Target Net
Results: mean: -81.7 +/- 149.2, min: -360.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<11:54, 16.78it/s]

Results: mean: -75.7 +/- 151.3, min: -345.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 601.73 minutes

Epoch #43


  0%|          | 0/100 [00:00<?, ?it/s]              

Updating Target Net
Results: mean: -101.2 +/- 151.1, min: -350.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<11:16, 17.74it/s]

Results: mean: -107.0 +/- 137.3, min: -340.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 616.16 minutes

Epoch #44


  1%|          | 1/100 [00:00<00:15,  6.24it/s]      

Updating Target Net
Results: mean: -97.4 +/- 149.7, min: -355.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<10:27, 19.11it/s]

Results: mean: -95.5 +/- 154.3, min: -340.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 630.55 minutes

Epoch #45


  0%|          | 0/100 [00:00<?, ?it/s]              

Updating Target Net
Results: mean: -87.8 +/- 149.5, min: -355.0, max: 66.0,

Testing...


  0%|          | 3/12000 [00:00<10:26, 19.15it/s]

Results: mean: -106.1 +/- 158.6, min: -345.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 644.96 minutes

Epoch #46


  0%|          | 0/100 [00:00<?, ?it/s]              

Updating Target Net
Results: mean: -82.9 +/- 146.6, min: -350.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<11:51, 16.87it/s]

Results: mean: -50.6 +/- 134.6, min: -360.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 659.21 minutes

Epoch #47


  1%|          | 1/100 [00:00<00:15,  6.31it/s]      

Updating Target Net
Results: mean: -73.7 +/- 140.6, min: -345.0, max: 66.0,

Testing...


  0%|          | 0/12000 [00:00<?, ?it/s]        

Results: mean: -48.6 +/- 132.2, min: -335.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 673.58 minutes

Epoch #48


  0%|          | 0/100 [00:00<?, ?it/s]              

Updating Target Net
Results: mean: -58.6 +/- 134.0, min: -350.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<11:00, 18.17it/s]

Results: mean: -68.6 +/- 146.5, min: -340.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 687.93 minutes

Epoch #49


  1%|          | 1/100 [00:00<00:14,  6.98it/s]      

Updating Target Net
Results: mean: -51.2 +/- 129.5, min: -350.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<10:26, 19.16it/s]

Results: mean: -61.1 +/- 135.0, min: -340.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 702.39 minutes

Epoch #50


  1%|          | 1/100 [00:00<00:16,  6.14it/s]      

Updating Target Net
Results: mean: -64.9 +/- 137.0, min: -355.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<11:05, 18.04it/s]

Results: mean: -70.6 +/- 141.4, min: -350.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 716.80 minutes

Epoch #51


  0%|          | 0/100 [00:00<?, ?it/s]              

Updating Target Net
Results: mean: -63.1 +/- 134.8, min: -355.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<12:03, 16.59it/s]

Results: mean: -76.2 +/- 133.4, min: -340.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 731.29 minutes

Epoch #52


  1%|          | 1/100 [00:00<00:18,  5.45it/s]      

Updating Target Net
Results: mean: -71.7 +/- 140.3, min: -345.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<11:49, 16.91it/s]

Results: mean: -53.9 +/- 118.3, min: -335.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 745.70 minutes

Epoch #53


  1%|          | 1/100 [00:00<00:17,  5.53it/s]      

Updating Target Net
Results: mean: -38.5 +/- 123.2, min: -355.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<10:24, 19.23it/s]

Results: mean: -23.5 +/- 108.4, min: -335.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 760.12 minutes

Epoch #54


  1%|          | 1/100 [00:00<00:15,  6.44it/s]      

Updating Target Net
Results: mean: -25.6 +/- 113.8, min: -345.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<11:58, 16.69it/s]

Results: mean: -12.9 +/- 104.8, min: -330.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 774.46 minutes

Epoch #55


  1%|          | 1/100 [00:00<00:18,  5.35it/s]      

Updating Target Net
Results: mean: -14.7 +/- 103.4, min: -340.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<13:41, 14.61it/s]

Results: mean: -20.2 +/- 104.5, min: -345.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 788.87 minutes

Epoch #56


  1%|          | 1/100 [00:00<00:13,  7.16it/s]      

Updating Target Net
Results: mean: -13.5 +/- 108.4, min: -345.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<10:00, 19.96it/s]

Results: mean: -27.2 +/- 113.8, min: -325.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 803.30 minutes

Epoch #57


  1%|          | 1/100 [00:00<00:15,  6.29it/s]      

Updating Target Net
Results: mean: -17.4 +/- 106.8, min: -350.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<11:01, 18.12it/s]

Results: mean: -12.8 +/- 97.0, min: -335.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 817.85 minutes

Epoch #58


  0%|          | 0/100 [00:00<?, ?it/s]              

Updating Target Net
Results: mean: -13.4 +/- 105.3, min: -345.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<10:36, 18.85it/s]

Results: mean: -0.3 +/- 91.1, min: -340.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 832.31 minutes

Epoch #59


  1%|          | 1/100 [00:00<00:15,  6.29it/s]      

Updating Target Net
Results: mean: -15.9 +/- 104.0, min: -345.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<11:15, 17.75it/s]

Results: mean: -16.4 +/- 103.0, min: -325.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 846.79 minutes

Epoch #60


  0%|          | 0/100 [00:00<?, ?it/s]              

Updating Target Net
Results: mean: -7.8 +/- 97.6, min: -350.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<10:55, 18.30it/s]

Results: mean: 2.8 +/- 89.2, min: -335.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 861.33 minutes

Epoch #61


  1%|          | 1/100 [00:00<00:16,  6.04it/s]      

Updating Target Net
Results: mean: -19.6 +/- 108.6, min: -340.0, max: 66.0,

Testing...


  0%|          | 3/12000 [00:00<10:26, 19.14it/s]

Results: mean: -32.1 +/- 118.6, min: -330.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 875.79 minutes

Epoch #62


  0%|          | 0/100 [00:00<?, ?it/s]              

Updating Target Net
Results: mean: -22.2 +/- 110.5, min: -350.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<10:26, 19.15it/s]

Results: mean: 0.6 +/- 81.4, min: -325.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 890.27 minutes

Epoch #63


  0%|          | 0/100 [00:00<?, ?it/s]              

Updating Target Net
Results: mean: -24.9 +/- 110.7, min: -345.0, max: 66.0,

Testing...


  0%|          | 2/12000 [00:00<11:07, 17.99it/s]

Results: mean: -27.3 +/- 109.8, min: -345.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 904.86 minutes

Epoch #64


  0%|          | 0/100 [00:00<?, ?it/s]              

Updating Target Net
Results: mean: -3.3 +/- 89.6, min: -345.0, max: 66.0,

Testing...


                                                 

Results: mean: -5.9 +/- 86.0, min: -335.0 max: 66.0
Saving the network weights to: /home/spillingvoid/programs/Wilco/model/wilco-doom.pth
Total elapsed time: 919.33 minutes
Training finished. It's time to watch!
Total score:  42.0
Total score:  30.0
Total score:  37.0
Total score:  42.0
Total score:  -195.0
Total score:  -10.0
Total score:  -164.0
Total score:  -44.0
Total score:  42.0
Total score:  -4.0
