1. Getting VizDoom Up and Running

In [None]:
#!pip install vizdoom

In [None]:
#!git clone https://github.com/Farama-Foundation/ViZDoom

In [1]:
# Import vizdoom for game env
from vizdoom import *
# Import random for action sampling
import random
# Import time for sleeping
import time
# Import numpy for identity matrix
import numpy as np

In [None]:
# Setup game
game = DoomGame()
game.load_config("github/ViZDoom/scenarios/basic.cfg")
game.init()

In [None]:
# This is the set of actions we can take in the environment
actions = np.identity(3,dtype=np.uint8)
actions

In [None]:
random.choice(actions)

In [None]:
# Loop through the episodes
episodes = 10
for episode in range(episodes):
    # Create a new episode or game
    game.new_episode()
    # Check the game isn't done
    while not game.is_episode_finished():
        state = game.get_state()
        img = state.screen_buffer
        info = state.game_variables
        reward = game.make_action(random.choice(actions),4)

        print("reward: ", reward )
        time.sleep(0.02)
    print("Result: ", game.get_total_reward())
    time.sleep(2)

game.close()

2. Converting it to a Gym Environment

In [2]:
# Import environment base class form OpenAI Gym
from gym import Env
# Import hym spaces
from gym.spaces import Discrete, Box
# Import opencv
import cv2


In [None]:
Discrete(3).sample()

In [None]:
actions[Discrete(3).sample()]

In [3]:
class VizDoomGym(Env):
    # Function that is called when we start the env
    def __init__(self, render = False):

        super(VizDoomGym, self).__init__()

        # Setup the game
        self.game = DoomGame()
        self.game.load_config("github/ViZDoom/scenarios/basic.cfg")
        

        # Render frame logic
        if render == False:
            self.game.set_window_visible(False)
        else:
            self.game.set_window_visible(True)

        # Start the game
        self.game.init()


        # Create the observation_space and action_space
        self.observation_space = Box(low=0, high=255, shape = (100,160,1), dtype=np.uint8)
        self.action_space = Discrete(3)
    
    # This is how we take a step in the environment
    def step(self, action):

        # Speficy action and take step
        actions = np.identity(3,dtype=np.uint8)
        reward = self.game.make_action(actions[action], 4)
        
        # Get all the other stuff we need to return
        if self.game.get_state():
       
            state = self.game.get_state().screen_buffer
            state = self.grayscale(state)
            ammo = self.game.get_state().game_variables[0]
            info = {"ammo":ammo}
           

        else:

            state = np.zeros(self.observation_space.shape)
            ammo = 0
            info = {"ammo":ammo}

        done = self.game.is_episode_finished()

        return state, reward, done, info
    
    # Define how to render the game or environment
    def render():
        pass

    # What happens when we start a new game
    def reset(self):
        
        self.game.new_episode()
        state = self.game.get_state().screen_buffer
        return self.grayscale(state)
    
    # Grayscale the game and resize it
    def grayscale(self, observation):
        
        gray = cv2.cvtColor(np.moveaxis(observation, 0, -1), cv2.COLOR_BGR2GRAY)
        resize = cv2.resize(gray, (160,100), interpolation=cv2.INTER_CUBIC)
        state = np.reshape(resize, (100,160,1))
        return state
        
    
    # Call to close down the game
    def close(self):
        self.game.close()
        


In [None]:
env = VizDoomGym()

In [None]:
state = env.reset()

In [None]:
# Import Environment checker
from stable_baselines3.common import env_checker

In [None]:
env_checker.check_env(env)

In [None]:
env.close()

3. View State

In [None]:
from matplotlib import pyplot as plt

In [None]:
plt.imshow(cv2.cvtColor(state, cv2.COLOR_BGR2RGB))

4. Setup Callback

In [None]:
# Import os for file path management
import os
# Import Base Callback for saving models
from stable_baselines3.common.callbacks import BaseCallback

In [None]:
class TrainAndLoggingCallback(BaseCallback):

    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path
        
    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path,exist_ok=True)
    
    def _on_step(self) :
        
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path,"best_model_{}".format(self.n_calls))
            self.model.save(model_path)

        return True
        

In [None]:
CHECKPOINT_DIR = "./train/train_basic"
LOG_DIR = "./logs/log_basic"

In [None]:
# Setup model saving callbacks
callback = TrainAndLoggingCallback(check_freq=10000,save_path=CHECKPOINT_DIR)

5. Train the Model

In [4]:
# Import PPO for training
from stable_baselines3 import PPO

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
env = VizDoomGym(render = True)


In [None]:
model = PPO("CnnPolicy", env, tensorboard_log = LOG_DIR, verbose = 1, learning_rate = 0.0001, n_steps = 512)

In [None]:
# Train the AI model, this is where the AI model starts to learn
model.learn(total_timesteps=1000000,callback=callback)

In [None]:
# If you want to see logs
# tensorboard --logdir=. 

6. Test the Model

In [5]:
# Import eval policy to test agent
from stable_baselines3.common.evaluation import evaluate_policy

In [6]:
# Reload model from disc
model = PPO.load("train/train_basic/best_model_140000.zip")

In [7]:
env = VizDoomGym(render=True)

In [8]:
mean_reward, _ = evaluate_policy(model, env, n_eval_episodes = 10)



In [9]:
mean_reward

88.6

In [14]:
for episode in range(5):
    obs = env.reset()
    done = False
    total_reward = 0
    while not done:
        action, _ = model.predict(obs)
        obs, reward, done, info = env.step(action)
        time.sleep(0.05)
        total_reward += reward
    print("Total Reward for episode {} is {}".format(episode, total_reward))
    time.sleep(2)

Total Reward for episode 0 is 95.0
Total Reward for episode 1 is 95.0
Total Reward for episode 2 is 67.0
Total Reward for episode 3 is 95.0
Total Reward for episode 4 is 95.0
