# Getting VizDoom Up and Running

In [1]:
# Import vizdoom for game env
from vizdoom import * 
# Import random for action sampling
import random
# Import time for sleeping
import time 
# Import numpy for identity matrix
import numpy as np

In [3]:
# Setup game
game = DoomGame()
game.load_config('configs/deadly_corridor.cfg')
game.init()
print(game.get_state().game_variables)

[100.   0.   0.  -1.]


In [4]:
# This is the set of actions we can take in the environment
actions = np.identity(7, dtype=np.uint8)
random.choice(actions)

array([0, 0, 0, 0, 0, 1, 0], dtype=uint8)

In [5]:
state = game.get_state().game_variables

In [6]:
print(state)

[100.   0.   0.  -1.]


In [7]:
# Loop through episodes 
episodes = 1
for episode in range(episodes): 
    # Create a new episode or game 
    game.new_episode()
    # Check the game isn't done 
    while not game.is_episode_finished(): 
        # Get the game state 
        state = game.get_state()
        # Get the game image 
        img = state.screen_buffer
        # Get the game variables - anmo
        info = state.game_variables
        print(info)
        # Take an action
        reward = game.make_action(random.choice(actions),4) # When our agent will take action it will skip 4 frames after taking the action.
        time.sleep(0.02)
    print('Result:', game.get_total_reward())
    time.sleep(2)

[100.   0.   0.  -1.]
[100.   0.   0.  52.]
[100.   0.   0.  52.]
[100.   0.   0.  52.]
[100.   0.   0.  52.]
[100.   0.   0.  52.]
[100.   0.   0.  51.]
[100.   0.   0.  51.]
[100.   0.   0.  51.]
[100.   0.   0.  51.]
[100.   0.   0.  51.]
[100.   0.   0.  51.]
[100.   0.   0.  50.]
[100.   0.   0.  50.]
[100.   0.   0.  50.]
[100.   0.   0.  50.]
[72. 28.  0. 50.]
[72. 28.  0. 50.]
[72. 28.  0. 50.]
[72. 28.  0. 49.]
[72. 28.  0. 49.]
[72. 28.  0. 49.]
[72. 28.  0. 49.]
[72. 28.  0. 48.]
[72. 28.  0. 48.]
[72. 28.  0. 48.]
[72. 28.  0. 48.]
[72. 28.  0. 48.]
[72. 28.  0. 48.]
[72. 28.  0. 48.]
[72. 28.  0. 48.]
[72. 28.  0. 48.]
[72. 28.  0. 48.]
[72. 28.  0. 48.]
[72. 28.  0. 48.]
[72. 28.  0. 48.]
[72. 28.  0. 48.]
[72. 28.  0. 48.]
[72. 28.  0. 48.]
[72. 28.  1. 47.]
[72. 28.  1. 47.]
[44. 56.  1. 47.]
[44. 56.  1. 47.]
[44. 56.  1. 47.]
[44. 56.  1. 46.]
[44. 56.  1. 46.]
[44. 56.  1. 46.]
[44. 56.  1. 46.]
[44. 56.  1. 46.]
[44. 56.  1. 46.]
[44. 56.  1. 46.]
[44. 56.  1. 46.]


In [8]:
game.close()

# 2. Converting it to a Gym Environment

In [9]:
# Import environment base class from OpenAI Gym
from gym import Env
# Import gym spaces 
from gym.spaces import Discrete, Box
# Import opencv 
import cv2

In [6]:
actions[Discrete(7).sample()]

array([0, 0, 0, 0, 0, 0, 1], dtype=uint8)

In [None]:
Box(low=0, high=10, shape=(320,240), dtype=np.uint8).sample()

In [14]:
# Create Vizdoom OpenAI Gym Environment
class VizDoomGym(Env): 
    # Function that is called when we start the env
    def __init__(self, render=False, doom_skill=1): 
        # Inherit from Env
        super().__init__()
        # Setup the game 
        self.game = DoomGame()
        self.game.load_config('configs/deadly_corridor.cfg')
        
        # Render frame logic
        self.game.set_window_visible(render)
                
        # Create the action space and observation space
        self.observation_space = Box(low=0, high=255, shape=(100,160,1), dtype=np.uint8) 
        self.action_space = Discrete(7)
        
        # Game variables: HEALTH DAMAGE_TAKEN HITCOUNT SELECTED_WEAPON_AMMO
        self.damage_taken = 0
        self.hitcount = 0
        self.ammo = 52 # CHANGED

        # Skill level can now be set during initialization
        self.game.set_doom_skill(doom_skill)

        # Start the game 
        self.game.init()
        
    # Setting up the doom_skill
    def set_skill_level(self, skill_level):
        self.doom_skill = skill_level
        self.game.close()  # Close the existing game instance
        self.game = DoomGame()  # Create a new game instance
        self.game.load_config('configs/deadly_corridor.cfg')
        self.game.set_doom_skill(skill_level)


    # This is how we take a step in the environment
    def step(self, action):
        # Specify action and take step 
        actions = np.identity(7)
        movement_reward = self.game.make_action(actions[action], 4) 
        
        reward = 0 
        # Get all the other stuff we need to retun 
        if self.game.get_state(): 
            state = self.game.get_state().screen_buffer
            state = self.grayscale(state)
            
            # Reward shaping
            game_variables = self.game.get_state().game_variables
            health, damage_taken, hitcount, ammo = game_variables
            
            # Calculate reward deltas
            damage_taken_delta = -damage_taken + self.damage_taken
            self.damage_taken = damage_taken
            hitcount_delta = hitcount - self.hitcount
            self.hitcount = hitcount
            ammo_delta = ammo - self.ammo
            self.ammo = ammo
            
            reward = movement_reward + damage_taken_delta*10 + hitcount_delta*200  + ammo_delta*5 
            info = ammo
        else: 
            state = np.zeros(self.observation_space.shape)
            info = 0 
        
        info = {"info":info}
        done = self.game.is_episode_finished()
        
        return state, reward, done, info 
    
    # Define how to render the game or environment 
    def render(self): 
        pass
    
    # What happens when we start a new game 
    def reset(self): 
        self.game.new_episode()
        state = self.game.get_state().screen_buffer
        return self.grayscale(state)
    
    # Grayscale the game frame and resize it 
    def grayscale(self, observation):
        gray = cv2.cvtColor(np.moveaxis(observation, 0, -1), cv2.COLOR_BGR2GRAY)
        resize = cv2.resize(gray, (160,100), interpolation=cv2.INTER_CUBIC)
        state = np.reshape(resize, (100,160,1))
        return state
    
    # Call to close down the game
    def close(self): 
        self.game.close()

In [15]:
env = VizDoomGym(render=True)

In [8]:
state = env.reset()

In [17]:
env.close()

# 3. View Game State

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.imshow(cv2.cvtColor(state, cv2.COLOR_BGR2RGB))

Environment Cheker

In [16]:
# Import Environment checker
from stable_baselines3.common import env_checker
env_checker.check_env(env)

# 4. Setup Callback

In [18]:
# Import os for file nav
import os 
# Import callback class from sb3
from stable_baselines3.common.callbacks import BaseCallback

In [19]:
class TrainAndLoggingCallback(BaseCallback):

    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path

    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)

        return True

In [20]:
CHECKPOINT_DIR = './models/models_deadly_corridor'
LOG_DIR = './logs/log_deadly_corridor'

In [21]:
callback = TrainAndLoggingCallback(check_freq=140000, save_path=CHECKPOINT_DIR)

# 5. Train Model

In [22]:
# import ppo for training
from stable_baselines3 import PPO

In [23]:
# Non rendered environment
env = VizDoomGym(doom_skill=1)

In [24]:
model = PPO('CnnPolicy', env, tensorboard_log=LOG_DIR, verbose=1, learning_rate=0.00001, n_steps=8192, clip_range=.1, gamma=.3, gae_lambda=.9)
model.learn(total_timesteps=400000)
model.save("./models/models_deadly_corridor/best_model_560000")  # Save the model after training on the first skill level

# Load the pre-trained model before training on each new skill level
for skill_level in range(2, 6):
    env = VizDoomGym(doom_skill=skill_level)
    model = PPO.load("./models/models_deadly_corridor/best_model_560000")  # Load the previously saved model
    model.set_env(env)
    model.learn(total_timesteps=40000)
    model.save("./models/models_deadly_corridor/best_model_560000")  # Save the model after training on the current skill level


Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.
Logging to ./logs/log_deadly_corridor\PPO_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 218      |
|    ep_rew_mean     | 94.7     |
| time/              |          |
|    fps             | 45       |
|    iterations      | 1        |
|    time_elapsed    | 182      |
|    total_timesteps | 8192     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 212        |
|    ep_rew_mean          | 143        |
| time/                   |            |
|    fps                  | 42         |
|    iterations           | 2          |
|    time_elapsed         | 386        |
|    total_timesteps      | 16384      |
| train/                  |            |
|    approx_kl            | 0.00390896 |
|    clip_fraction        | 0

# 6. Test the Model

In [25]:
# Import eval policy to test agent
from stable_baselines3.common.evaluation import evaluate_policy

In [26]:
# Reload model from disc
model = PPO.load('./models/models_deadly_corridor/best_model_560000')

In [27]:
# Create rendered environment
env = VizDoomGym(render=True, doom_skill=5)

In [28]:
# Evaluate mean reward for 10 games
mean_reward, _ = evaluate_policy(model, env, n_eval_episodes=10)
mean_reward



190.9540786743164

In [23]:
for episode in range(100): 
    obs = env.reset()
    done = False
    total_reward = 0
    while not done: 
        action, _ = model.predict(obs)
        obs, reward, done, info = env.step(action)
        time.sleep(0.20)
        total_reward += reward
    print('Total Reward for episode {} is {}'.format(episode+1, total_reward))
    time.sleep(1)

Total Reward for episode 1 is -472.9422607421875
Total Reward for episode 2 is 360.46434020996094
Total Reward for episode 3 is 434.45562744140625
Total Reward for episode 4 is -248.23692321777344


In [29]:
env.close()