# 1. Get VizDoom Up and Running

In [8]:
!pip install vizdoom



You should consider upgrading via the 'python -m pip install --upgrade pip' command.


In [9]:
!cd github & git clone https://github.com/Farama-Foundation/ViZDoom.git

fatal: destination path 'ViZDoom' already exists and is not an empty directory.


In [12]:
# Import vizdoom for game environment
from vizdoom import *
# Import random to take random actions
import random
# Import time to slow down game, sleep between frames
import time
# Import numpy for identity matrix
import numpy as np

In [4]:
# Setup Game
game = DoomGame()
game.load_config('github/ViZDoom/scenarios/basic.cfg')
game.init()

In [5]:
# Define the actions we can take in the environment - Move left, move right, attack
actions = np.identity(3, dtype=np.uint8) 

In [6]:
random.choice(actions)

array([0, 1, 0], dtype=uint8)

In [7]:
# Loop through episodes
episodes = 10 # Number of games to play
for episode in range(episodes):
    # Create new episode or game
    game.new_episode()
    # Check that the game isn't done
    while not game.is_episode_finished():
        # Get the game state
        state = game.get_state()
        # Get the game image
        img = state.screen_buffer
        # Get the game variables (in this case, ammo)
        info = state.game_variables
        # Take an action. Pass in frame skip to give AI time to process each action reward
        reward = game.make_action(random.choice(actions), 4)
        # Print the reward for each action
        print('Reward:', reward)
        time.sleep(0.02)
    # Print total reward for full game
    print('Result:', game.get_total_reward())
    time.sleep(2)

Reward: -4.0
Reward: 99.0
Result: 95.0
Reward: -4.0
Reward: -4.0
Reward: -9.0
Reward: -4.0
Reward: -4.0
Reward: -9.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -9.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -9.0
Reward: -4.0
Reward: -4.0
Reward: -9.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -9.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -9.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -9.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -9.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -9.0
Reward: -4.0
Reward: -4.0
Reward: -9.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -9.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -9.0
Reward: -4.0
Reward: -4.0
Reward: -9.0

ViZDoomUnexpectedExitException: Controlled ViZDoom instance exited unexpectedly.

In [None]:
game.close()

# 2. Converting the Environment to a Gym Envrionment

In [13]:
!pip install gym



You should consider upgrading via the 'python -m pip install --upgrade pip' command.


In [14]:
# Import environment base class from OpenAI Gym
from gym import Env
# Import gym spaces
from gym.spaces import Discrete, Box
# Import opencv, Used to greyscale observations to make processing environment faster
import cv2

In [15]:
# Create VIZDoom OpenAI Gym Environment
class VizDoomGym(Env):
    
    # Function that is called when we start the environment
    def __init__(self, render=False):
        # Inherit from Env
        super().__init__()
        
        # Setup the game
        self.game = DoomGame()
        self.game.load_config('github/ViZDoom/scenarios/basic.cfg') # Pass in whatever environment you need from the scenarios folder.
        
        # Define whether or not to render the game window.
        # Rendering the window takes away from computing power, so disabling is ideal for testing
        if render == False:
            self.game.set_window_visible(False)
        else:
            self.game.set_window_visible(True)
        
        self.game.init()
        
        # Create the action and observation space
        self.observation_space = Box(low=0, high=255, shape=(100, 160, 1), dtype=np.uint8)
        self.action_space = Discrete(3)
    
    # How we take a step in the environment
    def step(self, action):
        # Specify action and take step within game
        actions = np.identity(3, dtype=np.uint8)
        reward = self.game.make_action(actions[action], 4)
        
        # Get all other stuff we need to return
        if self.game.get_state():
            state = self.game.get_state().screen_buffer
            state = self.greyscale(state)
            ammo = self.game.get_state().game_variables[0]
            info = ammo
        else:
            state = np.zeros(self.observation_space.shape)
            info = 0
            
        info = {"info": info}
            
        done = self.game.is_episode_finished()
        
        return state, reward, done, info
    
    # Define how to render the game or environment. ViZDOom already defines this for us, so just pass.
    def render():
        pass
    
    # What happens when we start a new gmae
    def reset(self):
        self.game.new_episode()
        state = self.game.get_state().screen_buffer
        return self.greyscale(state)
    
    # Custom function. Greyscale the game frame and resize it
    def greyscale(self, observation):
        # Reshape the observation array for cvtColor and change color channels
        grey = cv2.cvtColor(np.moveaxis(observation, 0, -1), cv2.COLOR_BGR2GRAY)
        # Resize the image and scale down so there are less pixels to process
        resize = cv2.resize(grey, (160, 100), interpolation=cv2.INTER_CUBIC)
        state = np.reshape(resize, (100, 160, 1))
        return state
    
    # Call to close down the game
    def close(self):
        self.game.close()

In [None]:
env = VizDoomGym(render=True)

In [None]:
state = env.reset()

In [None]:
env.close()

In [None]:
# Import Environment checker
from stable_baselines3.common import env_checker

In [None]:
env_checker.check_env(env)

# 3. View State

In [8]:
!pip install matplotlib



You should consider upgrading via the 'python -m pip install --upgrade pip' command.


In [9]:
from matplotlib import pyplot as plt

In [10]:
plt.imshow(cv2.cvtColor(state, cv2.COLOR_BGR2RGB))

NameError: name 'state' is not defined

# 4. Log the Results

In [11]:
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu117

Looking in indexes: https://download.pytorch.org/whl/cu117


You should consider upgrading via the 'python -m pip install --upgrade pip' command.


In [12]:
!pip install stable-baselines3[extra]



You should consider upgrading via the 'python -m pip install --upgrade pip' command.


In [13]:
# Import os for file navigation
import os
# Import callback class from stable baselines 3
from stable_baselines3.common.callbacks import BaseCallback

In [14]:
class TrainAndLoggingCallback(BaseCallback):
    
    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path
        
    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)
    
    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)
            
        return True

In [15]:
CHECKPOINT_DIR = './train/train_basic'
LOG_DIR = './logs/log_basic'

In [16]:
callback = TrainAndLoggingCallback(check_freq=10000, save_path=CHECKPOINT_DIR)

# 5. Train Model

In [16]:
# Import the PPO algorithm for training
from stable_baselines3 import PPO

In [17]:
env = VizDoomGym()

In [21]:
# CnnPolicy because we are passing in an image
# Cnn = Convolution neural network
model = PPO('CnnPolicy', env, tensorboard_log=LOG_DIR, verbose=1, learning_rate=0.0001, n_steps=2048)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


In [22]:
model.learn(total_timesteps=100000, callback=callback)

Logging to ./logs/log_basic\PPO_3
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 31.6     |
|    ep_rew_mean     | -78.9    |
| time/              |          |
|    fps             | 37       |
|    iterations      | 1        |
|    time_elapsed    | 55       |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 34.4        |
|    ep_rew_mean          | -95.2       |
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 2           |
|    time_elapsed         | 115         |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.003975576 |
|    clip_fraction        | 0.137       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.09       |
|    explained_variance   | -0.00016  

KeyboardInterrupt: 

To view the logs run `tensorboard --logdir=.` in the PPO dir of the model run.  (CHECK 1 42 00 IN THE VIDEO FOR EXPLANATION)
Explaining the data:  
> 1. `ep_len_mean`: Mean episode length (averaged over stats_window_size episodes, 100 by default)
> 2. `ep_rew_mean`: Mean episodic training reward (averaged over stats_window_size episodes, 100 by default).
> 3.
> 4.
> 5.

# 6. Test the Model

In [18]:
# Import eval policy to test agent
from stable_baselines3.common.evaluation import evaluate_policy

In [19]:
# Reload model from disc
model = PPO.load('./train/train_basic/best_model_110000')

In [20]:
# Create rendered environment
env = VizDoomGym(render=True)

In [31]:
# Evaluate mean reward for 100 games
mean_reward, _ = evaluate_policy(model, env, n_eval_episodes=100)

In [32]:
mean_reward

86.99

In [24]:
for episode in range(5):
    obs = env.reset()
    done = False
    total_reward = 0
    while not done:
        action, _ = model.predict(obs)
        obs, reward, done, info = env.step(action)
        time.sleep(0.25)
        total_reward += reward
    print('Total reward for episode {} is {}'.format(total_reward, episode))
    time.sleep(2)

Total reward for episode 67.0 is 0
Total reward for episode 95.0 is 1
Total reward for episode 95.0 is 2
Total reward for episode 95.0 is 3
Total reward for episode 95.0 is 4


In [25]:
env.close()