# 1. Getting VizDoom Up and Running

In [1]:
!pip install vizdoom



In [2]:
!cd github & git clone https://github.com/Farama-Foundation/ViZDoom

fatal: destination path 'ViZDoom' already exists and is not an empty directory.


In [4]:
#Import vizdoom for game env
from vizdoom import *
#Import random for action sampling
import random
#Import time for sleeping b/w frames
import time
#Import numpy for identity matrix
import numpy as np

In [5]:
#Setup game
game = DoomGame()
game.load_config('github/VizDoom/scenarios/basic.cfg')
game.init()

In [5]:
#This is the set of actions we can take in the environment
actions = np.identity(3, dtype=np.uint8)

In [6]:
#Loop thorugh episodes
episodes = 10
for episode in range(episodes):
    #Create a new episode or game
    game.new_episode()
    #Checking hte game isn't finish
    while not game.is_episode_finished():
        #Get the game state
        state = game.get_state()
        #Get the game image
        img = state.screen_buffer
        #Get the game variables - ammo
        info = state.game_variables
        #Take an action
        reward = game.make_action(random.choice(actions),4)
        #Print reward
        print('Reward:', reward)
        time.sleep(0.02)
    print('Result:', game.get_total_reward())
    time.sleep(2)

Reward: -4.0
Reward: -4.0
Reward: 99.0
Result: 91.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -9.0
Reward: -4.0
Reward: -4.0
Reward: -9.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -9.0
Reward: -4.0
Reward: -4.0
Reward: -9.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -9.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -9.0
Reward: -4.0
Reward: -4.0
Reward: -9.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -9.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -9.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -9.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -9.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -9.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -9.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -9.0
Reward: -4.0
Reward: -4.0
Reward: -4.0
Reward: -4.0

ViZDoomUnexpectedExitException: Controlled ViZDoom instance exited unexpectedly.

In [None]:
game.close()


# 2. Converting it to a Gym Environment

In [None]:
!pip install gym

In [6]:
#Import environment base class from OpenAI Gym
import gymnasium as gym

#Import gym spaces
from gymnasium.spaces import Discrete, Box
#Import opencv
import cv2
#Import vizdoom for game env
from vizdoom import *
#Import random for action sampling
import random
#Import time for sleeping b/w frames
import time
#Import numpy for identity matrix
import numpy as np

In [7]:
#Create Vizdoom OpenAI Gym Environment
class VizDoomGym(gym.Env):
    #Function that is called when we start the env
    def __init__(self, render=False):
        #Inherit from Env
        super().__init__()
        #Setup the game
        self.game = DoomGame()
        self.game.load_config('github/VizDoom/scenarios/basic.cfg')

        #Render frame Logic
        if render == False:
            self.game.set_window_visible(False)
        else:
            self.game.set_window_visible(True)
       
        #Start the game
        self.game.init() 
       
        #Create the action space and observation space
        self.observation_space = Box(low=0, high=255, shape=(100,160,1), dtype=np.uint8)
        self.action_space = Discrete(3)

    #This is how we take a step in the env    
    def step(self, action):
        #Specify action and take step
        actions = np.identity(3, dtype=np.uint8)
        reward = self.game.make_action(actions[action], 4)

        #Get all the other stuff we need to reutrn
        if self.game.get_state():
            state = self.game.get_state().screen_buffer
            state = self.grayscale(state)
            ammo = self.game.get_state().game_variables[0]
            info = ammo
        else:
            state = np.zeros(self.observation_space.shape)
            info = 0

        info = {"info":info}
        done = self.game.is_episode_finished()
        truncated = False

        return state, reward, done, truncated, info
        
    #Define how to render the game or environment
    def render():
        pass
    #What happen when we start a new game
    def reset(self, seed=None):
        self.game.new_episode()
        state = self.game.get_state().screen_buffer
        ammo = self.game.get_state().game_variables[0]
        info = {"ammo":ammo}
        return self.grayscale(state), info
        
    #Grayscale the game frame and resize it
    def grayscale(self, observation):
        gray = cv2.cvtColor(np.moveaxis(observation, 0, -1), cv2.COLOR_BGR2GRAY)
        resize = cv2.resize(gray, (160,100), interpolation=cv2.INTER_CUBIC)
        state = np.reshape(resize, (100,160,1))
        return state
        
    #Call to close down the game
    def close(self):
        self.game.close()

In [3]:
env = VizDoomGym(render=True)

In [None]:
state = env.reset()

In [7]:
env.close()

In [8]:
#Import Environment checker
from stable_baselines3.common import env_checker

In [6]:
env_checker.check_env(env)

# 3. View State

In [None]:
!pip install matplotlib


In [10]:
from matplotlib import pyplot as plt

In [None]:
plt.imshow(cv2.cvtColor(state, cv2.COLOR_BGR2RGB))

# 4. Setup Callback

In [None]:
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121

In [None]:
!pip install stable-baselines3[extra]

In [9]:
#Import os for file nav
import os
#Import callback class from sb3
from stable_baselines3.common.callbacks import BaseCallback

In [9]:
class TrainAndLoggingCallback(BaseCallback):

    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path

    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)

        return True

In [19]:
CHECKPOINT_DIR = './train/train_basic'
LOG_DIR = './logs/log_basic'

In [11]:
callback = TrainAndLoggingCallback(check_freq=10000, save_path=CHECKPOINT_DIR)

# 5. Train Model

In [14]:
#import PPO for training
from stable_baselines3 import PPO

In [16]:
#Non rendered environment
env = VizDoomGym()

In [17]:
env.close()


In [20]:
model = PPO('CnnPolicy', env, tensorboard_log=LOG_DIR, verbose=1, learning_rate=0.0001, n_steps=2048)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


In [19]:
model.learn(total_timesteps=100000, callback=callback)

Logging to ./logs/log_basic\PPO_6
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 30.2     |
|    ep_rew_mean     | -66.3    |
| time/              |          |
|    fps             | 43       |
|    iterations      | 1        |
|    time_elapsed    | 47       |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 27.6        |
|    ep_rew_mean          | -50.4       |
| time/                   |             |
|    fps                  | 41          |
|    iterations           | 2           |
|    time_elapsed         | 98          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.008570114 |
|    clip_fraction        | 0.123       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.09       |
|    explained_variance   | -0.000102 

<stable_baselines3.ppo.ppo.PPO at 0x22786cbe800>

# 6. Test the Model

In [10]:
#Import eval policy to test agent
from stable_baselines3.common.evaluation import evaluate_policy

In [21]:
#Reload model from disc
model = PPO.load('./train/train_basic/best_model_120000')

In [22]:
#Create rendered environment
env = VizDoomGym(render=True)

In [23]:
#Evaluate mean reward for 10 games
mean_reward, _ = evaluate_policy(model, env, n_eval_episodes=100)



In [27]:
mean_reward

88.37

In [46]:
print(env.reset())


(array([[[55],
        [50],
        [59],
        ...,
        [57],
        [57],
        [66]],

       [[68],
        [65],
        [65],
        ...,
        [56],
        [67],
        [72]],

       [[49],
        [79],
        [66],
        ...,
        [79],
        [51],
        [29]],

       ...,

       [[75],
        [63],
        [62],
        ...,
        [44],
        [71],
        [60]],

       [[15],
        [48],
        [47],
        ...,
        [49],
        [69],
        [47]],

       [[22],
        [14],
        [26],
        ...,
        [57],
        [37],
        [39]]], dtype=uint8), {'ammo': 50.0})


In [47]:
obs = env.reset()
print(type(obs))
if isinstance(obs, tuple):
    for i, item in enumerate(obs):
        print(f"Item {i} type: {type(item)}, shape: {getattr(item, 'shape', 'N/A')}")
else:
    print(f"Observation shape: {obs.shape}")


<class 'tuple'>
Item 0 type: <class 'numpy.ndarray'>, shape: (100, 160, 1)
Item 1 type: <class 'dict'>, shape: N/A


In [51]:
for episode in range(5):
    obs, info = env.reset()
    done = False
    total_reward = 0
    while not done:
        action, _ = model.predict(obs)
        obs, reward, done, _, info = env.step(action)
        time.sleep(0.05)
        total_reward += reward
    print('Total Reward for episode {} is {}'.format(total_reward, episode))
    time.sleep(2)

Total Reward for episode 95.0 is 0
Total Reward for episode 67.0 is 1
Total Reward for episode 71.0 is 2
Total Reward for episode 83.0 is 3
Total Reward for episode 95.0 is 4
