Setting up Doom Environment

In [1]:
#pipinstall vizdoom and clone repo in new folder
#!cd github & git clone https://github.com/Farama-Foundation/ViZDoom.git

#import vizdoom to setup game environment
from vizdoom import *

import random
import time
import numpy as np
from matplotlib import pyplot as plt

#pip install and import gym environment
from gym import Env
from gym.spaces import Discrete, Box
import cv2

In [2]:
class VizDoomGym(Env):
    #called when env is started > game
    def __init__(self,render=False):
        
        #inherit from Env import
        super().__init__()
        
        self.game = DoomGame()
        self.game.load_config("github/VizDoom/scenarios/basic.cfg")
        
        #Disable or enable window visiblity when game is running
        if render == False:
            self.game.set_window_visible(False)
        else:
            self.game.set_window_visible(True)
        
        self.game.init()
    
        
        
        #create obs and action space
        #low/high indicates pixel vals
        self.observation_space = Box(low=0,high=255,shape=(100,160,1),dtype=np.uint8)
        self.action_space = Discrete(3)
    #tale actons
    def step(self,action):
        actions = np.identity(3,dtype=np.uint8)
        
        #take action, make_action() returns reward value for taking ste
        #2nd para is frame skip to give time between taking action and receiving result
        reward = self.game.make_action(actions[action],4)
        
        #if something is returned from game_state()
        if self.game.get_state():
            #get game state to grab screen image
            state = self.game.get_state().screen_buffer
            #apply grayscale
            state = self.grayscale(state)
            #use game state to grab game vars, i.e. ammo
            ammo = self.game.get_state().game_variables[0]
            info = ammo
        #game_state returns nothing/errors out
        else:
            state = np.zeros(self.observation_space.shape)
            info = 0
        
        info = {"info":info}
        
        done = self.game.is_episode_finished()
        
        
        return state,reward,done,info
    def render():
        pass
    
    #resets game
    def reset(self):
        self.game.new_episode()
        state = self.game.get_state().screen_buffer
        return self.grayscale(state)
    
    #grayscale frame and scales down image, to make training faster
    def grayscale(self,observation):
        gray = cv2.cvtColor(np.moveaxis(observation,0,-1),cv2.COLOR_BGR2GRAY)
        resize = cv2.resize(gray, (160,100),interpolation=cv2.INTER_CUBIC)
        state = np.reshape(resize,(100,160,1))
        return state
    #close the game
    def close(self):
        self.game.close()

In [3]:
import os
from stable_baselines3 import PPO
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common import env_checker
from stable_baselines3.common.evaluation import evaluate_policy

Setting up callback for training

In [4]:
#saves tensorboard log file after training, go into PPO_n and run tensorboard --logdir=. then open local host link
class TrainAndLoggingCallback(BaseCallback):

    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path

    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)

        return True
    
CHECKPOINT_DIR = './train/train_basic'
LOG_DIR = './logs/log_basic'

#after every 10k steps of training model, save version of pytorch weights for RL agent
callback = TrainAndLoggingCallback(check_freq=10000, save_path=CHECKPOINT_DIR)

Proximal Policy Optimization model

In [5]:
# Non rendered environment
env = VizDoomGym()
#pass convolutional neural network, cnn for image
model = PPO('CnnPolicy', env, tensorboard_log=LOG_DIR, verbose=1, learning_rate=0.0001, n_steps=2048)
model.learn(total_timesteps=100000, callback=callback)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.
Logging to ./logs/log_basic\PPO_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 30.5     |
|    ep_rew_mean     | -73.1    |
| time/              |          |
|    fps             | 63       |
|    iterations      | 1        |
|    time_elapsed    | 32       |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 33.1        |
|    ep_rew_mean          | -88.3       |
| time/                   |             |
|    fps                  | 45          |
|    iterations           | 2           |
|    time_elapsed         | 89          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.005784576 |
|    clip_fraction        | 0

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.8        |
|    ep_rew_mean          | 47.1        |
| time/                   |             |
|    fps                  | 28          |
|    iterations           | 11          |
|    time_elapsed         | 792         |
|    total_timesteps      | 22528       |
| train/                  |             |
|    approx_kl            | 0.025873838 |
|    clip_fraction        | 0.24        |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.918      |
|    explained_variance   | 0.626       |
|    learning_rate        | 0.0001      |
|    loss                 | 1.94e+03    |
|    n_updates            | 100         |
|    policy_gradient_loss | 0.00101     |
|    value_loss           | 3.09e+03    |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12.8  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 8.19        |
|    ep_rew_mean          | 65.1        |
| time/                   |             |
|    fps                  | 30          |
|    iterations           | 21          |
|    time_elapsed         | 1399        |
|    total_timesteps      | 43008       |
| train/                  |             |
|    approx_kl            | 0.025863726 |
|    clip_fraction        | 0.251       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.506      |
|    explained_variance   | 0.786       |
|    learning_rate        | 0.0001      |
|    loss                 | 539         |
|    n_updates            | 200         |
|    policy_gradient_loss | 0.00222     |
|    value_loss           | 1.42e+03    |
-----------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 6.34      

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 4.65       |
|    ep_rew_mean          | 83.3       |
| time/                   |            |
|    fps                  | 30         |
|    iterations           | 31         |
|    time_elapsed         | 2085       |
|    total_timesteps      | 63488      |
| train/                  |            |
|    approx_kl            | 0.01825897 |
|    clip_fraction        | 0.104      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.215     |
|    explained_variance   | 0.687      |
|    learning_rate        | 0.0001     |
|    loss                 | 23.1       |
|    n_updates            | 300        |
|    policy_gradient_loss | 0.0511     |
|    value_loss           | 38.5       |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 3.97       |
|    ep_rew_mean

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 4.42       |
|    ep_rew_mean          | 85.1       |
| time/                   |            |
|    fps                  | 30         |
|    iterations           | 41         |
|    time_elapsed         | 2773       |
|    total_timesteps      | 83968      |
| train/                  |            |
|    approx_kl            | 0.09756045 |
|    clip_fraction        | 0.0808     |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.147     |
|    explained_variance   | 0.737      |
|    learning_rate        | 0.0001     |
|    loss                 | 10.6       |
|    n_updates            | 400        |
|    policy_gradient_loss | 0.0183     |
|    value_loss           | 36         |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.76        |
|    ep_rew_m

<stable_baselines3.ppo.ppo.PPO at 0x1e47d9e7400>

In [32]:
%tensorboard --logdir {log_basic/PPO_1}  --host localhost

In [40]:
%cd logs

C:\Users\ahmed\OneDrive\Desktop\Training-an-AI-to-play-Doom\logs


In [42]:
%tensorboard --logdir PPO_1

In [50]:
!cd log_basic

In [51]:
!pwd

'pwd' is not recognized as an internal or external command,
operable program or batch file.


In [36]:
%load_ext tensorboard
%tensorboard --logdir logs/log_basic



The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [7]:
import os
from stable_baselines3 import PPO
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common import env_checker
from stable_baselines3.common.evaluation import evaluate_policy

In [8]:
# Reload model at nk steps, > number of episdoes
model10k = PPO.load('./train/train_basic/best_model_30000')
model50k = PPO.load('./train/train_basic/best_model_50000')
model100k = PPO.load('./train/train_basic/best_model_100000')

In [10]:
# Create rendered environment
env = VizDoomGym(render=True)

In [11]:
#applying 10k steps model to 100 games
# Evaluate mean reward for 100 games using loaded model
mean_reward, _ = evaluate_policy(model10k, env, n_eval_episodes=100)
mean_reward



74.14

In [12]:
#applying 50k steps model to 100 games
mean_reward, _ = evaluate_policy(model50k, env, n_eval_episodes=100)
mean_reward

83.32

In [13]:
#applying 100k steps model to 100 games
mean_reward, _ = evaluate_policy(model100k, env, n_eval_episodes=100)
mean_reward

86.84

In [14]:
model.predict(obs)

NameError: name 'obs' is not defined

In [180]:
for episode in range(100): 
    obs = env.reset()
    done = False
    total_reward = 0
    while not done: 
        action, _ = model.predict(obs)
        obs, reward, done, info = env.step(action)
        #change to slow down frame
        #time.sleep(0.20)
        total_reward += reward
    print('Total Reward for episode {} is {}'.format(episode, total_reward))
    time.sleep(2)

Total Reward for episode 0 is 95.0
Total Reward for episode 1 is -300.0
Total Reward for episode 2 is 95.0
Total Reward for episode 3 is 95.0
Total Reward for episode 4 is 95.0
Total Reward for episode 5 is 95.0
Total Reward for episode 6 is 95.0
Total Reward for episode 7 is 95.0
Total Reward for episode 8 is -315.0
Total Reward for episode 9 is -300.0
Total Reward for episode 10 is 95.0
Total Reward for episode 11 is 95.0
Total Reward for episode 12 is 66.0
Total Reward for episode 13 is 95.0
Total Reward for episode 14 is 95.0
Total Reward for episode 15 is -300.0
Total Reward for episode 16 is 68.0
Total Reward for episode 17 is 59.0
Total Reward for episode 18 is 70.0
Total Reward for episode 19 is 95.0
Total Reward for episode 20 is 95.0
Total Reward for episode 21 is -300.0
Total Reward for episode 22 is 95.0
Total Reward for episode 23 is 95.0
Total Reward for episode 24 is -300.0
Total Reward for episode 25 is 95.0
Total Reward for episode 26 is 95.0
Total Reward for episode 2

ViZDoomUnexpectedExitException: Controlled ViZDoom instance exited unexpectedly.