In [None]:
%%bash

apt-get update

# ZDoom dependencies
apt-get install build-essential zlib1g-dev libsdl2-dev libjpeg-dev \
nasm tar libbz2-dev libgtk2.0-dev cmake git libfluidsynth-dev libgme-dev \
libopenal-dev timidity libwildmidi-dev unzip

# Boost libraries
apt-get install libboost-all-dev

# Lua binding dependencies
apt-get install liblua5.1-dev

pip install git+https://github.com/mwydmuch/ViZDoom

Get:1 https://cloud.r-project.org/bin/linux/ubuntu bionic-cran35/ InRelease [3,626 B]
Ign:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64  InRelease
Get:3 http://security.ubuntu.com/ubuntu bionic-security InRelease [88.7 kB]
Ign:4 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64  InRelease
Get:5 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64  Release [564 B]
Hit:6 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64  Release
Get:7 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64  Release.gpg [819 B]
Get:8 http://ppa.launchpad.net/graphics-drivers/ppa/ubuntu bionic InRelease [21.3 kB]
Hit:9 http://archive.ubuntu.com/ubuntu bionic InRelease
Get:11 http://archive.ubuntu.com/ubuntu bionic-updates InRelease [88.7 kB]
Get:12 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64  Packages [141 kB]
Get:13 http://ppa.laun

  Running command git clone -q https://github.com/mwydmuch/ViZDoom /tmp/pip-req-build-l6oxeq3a


In [None]:
import tensorflow as tf
import numpy as np
import cv2
import vizdoom

In [None]:
env = vizdoom.DoomGame()
env.set_doom_scenario_path(f"./drive/My Drive/doom/corridor.wad")
env.set_doom_map("map01")

env.set_screen_resolution(vizdoom.ScreenResolution.RES_320X240)
env.set_screen_format(vizdoom.ScreenFormat.RGB24)

env.set_render_hud(False)
env.set_render_minimal_hud(False)
env.set_render_crosshair(False)
env.set_render_weapon(True)
env.set_render_messages(False)
env.set_render_screen_flashes(True)

env.add_available_button(vizdoom.Button.MOVE_BACKWARD)
env.add_available_button(vizdoom.Button.MOVE_FORWARD)

env.add_available_game_variable(vizdoom.GameVariable.POSITION_X)

env.set_episode_timeout(200)

env.set_window_visible(False)
env.set_mode(vizdoom.Mode.PLAYER)

# #env.set_console_enabled(True)

env.init()

actions = [[1, 0], [0, 1]]
positions = [32, 96, 160, 224, 288, 352, 416]

def select_action(x, option):
    return 1 if x < positions[option] else 0

def option_reached(x, option):
    return True if abs(x - positions[option]) < 5 else False

In [None]:
class HReinforce:

    def __init__(self):
        self.memory = []
        self.frames = []
        self.options = []
        self.rewards = []
        self.discount_rate = 0.99
        self.learning_rate = 0.001
        self.model = self.build_model()

    def build_model(self):
        model = tf.keras.Sequential()
        model.add(tf.keras.layers.Conv2D(32, input_shape=(100, 100, 3), kernel_size=(8, 8), strides=(4, 4), activation='relu'))
        model.add(tf.keras.layers.Conv2D(64, (4, 4), strides=(2, 2), activation='relu'))
        model.add(tf.keras.layers.Flatten())
        model.add(tf.keras.layers.Dense(256))
        model.add(tf.keras.layers.Dense(7, activation='softmax'))
        model.compile(loss="categorical_crossentropy",
                      optimizer=tf.keras.optimizers.Adam(lr=self.learning_rate, clipnorm=1.0))
        model.summary()
        return model

    def select_option(self, frame, x):
        applicable = np.delete(np.arange(7), x // 64)
        frame = np.reshape(frame, (1, 100, 100, 3)) / 255
        prob = np.delete(self.model.predict(frame)[0], x // 64)
        if np.sum(prob) == 0:
            return np.random.choice(applicable)
        prob /= np.sum(prob)
        return np.random.choice(applicable, 1, p=prob)[0]

    def store_transition(self, frame, option, reward):
        self.frames.append(frame)
        self.options.append(option)
        self.rewards.append(reward)

    def store_episode(self):
        self.memory.append((self.frames, self.options, self.rewards))
        self.frames = []
        self.options = []
        self.rewards = []

    def update(self, episode):
        trajectory = self.memory[episode]
        frames = trajectory[0][:-1]
        options = trajectory[1][:-1]
        rewards = trajectory[2][:-1]

        T = len(frames)

        returns = np.zeros(T)
        returns[-1] = rewards[-1]
        for t in reversed(range(1, T)):
            returns[t - 1] = rewards[t - 1] + self.discount_rate * returns[t]

        x = np.reshape(frames, (T, 100, 100, 3)) / 255
        y = np.zeros((T, 7))

        for i in range(T):
            y[i, options[i]] = returns[i]

        self.model.fit(x, y, epochs=1, verbose=0)

    def batch_update(self, start, end):
        for episode in range(start, end):
            self.update(episode)

In [None]:
def train(start, runs):
    if start == 0:
        rewards = np.zeros((runs, 10000))
    else:
        rewards = np.load(f"./drive/My Drive/doom/basic_rewards.npy")

    for run in range(start, runs):
        print("\nRun " + str(run))

        meta = HReinforce()

        for episode in range(10000):
            env.new_episode()
            meta_state = env.get_state()
            episode_reward = 0

            frame = cv2.resize(meta_state.screen_buffer, (100, 100))

            while not env.is_episode_finished():
                if episode < 1000:
                    # applicable = np.delete(np.arange(7), meta_state.game_variables[0] // 64)
                    option = np.random.choice(7)
                else:
                    option = meta.select_option(frame, meta_state.game_variables[0])
                reached = option_reached(meta_state.game_variables[0], option)

                option_reward = 0

                state = meta_state
                while not env.is_episode_finished() and not reached:
                    action = select_action(state.game_variables[0], option)
                    reward = env.make_action(actions[action])
                    next_state = env.get_state()
                    
                    option_reward += reward
                    episode_reward += reward
                    if next_state:
                        reached = option_reached(next_state.game_variables[0], option)
                        state = next_state

                meta.store_transition(frame, option, option_reward)
                meta_state = state
                frame = cv2.resize(meta_state.screen_buffer, (100, 100))

            meta.store_episode()
            meta.update(episode)

            rewards[run, episode] = episode_reward

            if episode % 1000 == 999:
                np.save(f"./drive/My Drive/doom/basic_rewards", rewards)

        meta.model.save(f"./drive/My Drive/doom/basic_{run}_{episode}.h5")

In [None]:
train(0, 10)


Run 0
Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_4 (Conv2D)            (None, 24, 24, 32)        6176      
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 11, 11, 64)        32832     
_________________________________________________________________
flatten_2 (Flatten)          (None, 7744)              0         
_________________________________________________________________
dense_4 (Dense)              (None, 256)               1982720   
_________________________________________________________________
dense_5 (Dense)              (None, 7)                 1799      
Total params: 2,023,527
Trainable params: 2,023,527
Non-trainable params: 0
_________________________________________________________________
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where





Run 1
Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_6 (Conv2D)            (None, 24, 24, 32)        6176      
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 11, 11, 64)        32832     
_________________________________________________________________
flatten_3 (Flatten)          (None, 7744)              0         
_________________________________________________________________
dense_6 (Dense)              (None, 256)               1982720   
_________________________________________________________________
dense_7 (Dense)              (None, 7)                 1799      
Total params: 2,023,527
Trainable params: 2,023,527
Non-trainable params: 0
_________________________________________________________________





Run 2
Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_8 (Conv2D)            (None, 24, 24, 32)        6176      
_________________________________________________________________
conv2d_9 (Conv2D)            (None, 11, 11, 64)        32832     
_________________________________________________________________
flatten_4 (Flatten)          (None, 7744)              0         
_________________________________________________________________
dense_8 (Dense)              (None, 256)               1982720   
_________________________________________________________________
dense_9 (Dense)              (None, 7)                 1799      
Total params: 2,023,527
Trainable params: 2,023,527
Non-trainable params: 0
_________________________________________________________________





Run 3
Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_10 (Conv2D)           (None, 24, 24, 32)        6176      
_________________________________________________________________
conv2d_11 (Conv2D)           (None, 11, 11, 64)        32832     
_________________________________________________________________
flatten_5 (Flatten)          (None, 7744)              0         
_________________________________________________________________
dense_10 (Dense)             (None, 256)               1982720   
_________________________________________________________________
dense_11 (Dense)             (None, 7)                 1799      
Total params: 2,023,527
Trainable params: 2,023,527
Non-trainable params: 0
_________________________________________________________________





Run 4
Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_12 (Conv2D)           (None, 24, 24, 32)        6176      
_________________________________________________________________
conv2d_13 (Conv2D)           (None, 11, 11, 64)        32832     
_________________________________________________________________
flatten_6 (Flatten)          (None, 7744)              0         
_________________________________________________________________
dense_12 (Dense)             (None, 256)               1982720   
_________________________________________________________________
dense_13 (Dense)             (None, 7)                 1799      
Total params: 2,023,527
Trainable params: 2,023,527
Non-trainable params: 0
_________________________________________________________________





Run 5
Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_14 (Conv2D)           (None, 24, 24, 32)        6176      
_________________________________________________________________
conv2d_15 (Conv2D)           (None, 11, 11, 64)        32832     
_________________________________________________________________
flatten_7 (Flatten)          (None, 7744)              0         
_________________________________________________________________
dense_14 (Dense)             (None, 256)               1982720   
_________________________________________________________________
dense_15 (Dense)             (None, 7)                 1799      
Total params: 2,023,527
Trainable params: 2,023,527
Non-trainable params: 0
_________________________________________________________________





Run 6
Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_16 (Conv2D)           (None, 24, 24, 32)        6176      
_________________________________________________________________
conv2d_17 (Conv2D)           (None, 11, 11, 64)        32832     
_________________________________________________________________
flatten_8 (Flatten)          (None, 7744)              0         
_________________________________________________________________
dense_16 (Dense)             (None, 256)               1982720   
_________________________________________________________________
dense_17 (Dense)             (None, 7)                 1799      
Total params: 2,023,527
Trainable params: 2,023,527
Non-trainable params: 0
_________________________________________________________________





Run 7
Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_18 (Conv2D)           (None, 24, 24, 32)        6176      
_________________________________________________________________
conv2d_19 (Conv2D)           (None, 11, 11, 64)        32832     
_________________________________________________________________
flatten_9 (Flatten)          (None, 7744)              0         
_________________________________________________________________
dense_18 (Dense)             (None, 256)               1982720   
_________________________________________________________________
dense_19 (Dense)             (None, 7)                 1799      
Total params: 2,023,527
Trainable params: 2,023,527
Non-trainable params: 0
_________________________________________________________________





Run 8
Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_20 (Conv2D)           (None, 24, 24, 32)        6176      
_________________________________________________________________
conv2d_21 (Conv2D)           (None, 11, 11, 64)        32832     
_________________________________________________________________
flatten_10 (Flatten)         (None, 7744)              0         
_________________________________________________________________
dense_20 (Dense)             (None, 256)               1982720   
_________________________________________________________________
dense_21 (Dense)             (None, 7)                 1799      
Total params: 2,023,527
Trainable params: 2,023,527
Non-trainable params: 0
_________________________________________________________________





Run 9
Model: "sequential_11"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_22 (Conv2D)           (None, 24, 24, 32)        6176      
_________________________________________________________________
conv2d_23 (Conv2D)           (None, 11, 11, 64)        32832     
_________________________________________________________________
flatten_11 (Flatten)         (None, 7744)              0         
_________________________________________________________________
dense_22 (Dense)             (None, 256)               1982720   
_________________________________________________________________
dense_23 (Dense)             (None, 7)                 1799      
Total params: 2,023,527
Trainable params: 2,023,527
Non-trainable params: 0
_________________________________________________________________


