# 1. Getting VizDoom Up and Running

In [None]:
!pip install vizdoom

In [None]:
!cd github & git clone https://github.com/mwydmuch/ViZDoom

In [20]:
# Import vizdoom for game env
from vizdoom import * 
# Import random for action sampling
import random
# Import time for sleeping
import time 
# Import numpy for identity matrix
import numpy as np

In [145]:
# Setup game
game = DoomGame()
game.load_config('github/VizDoom/scenarios/basic.cfg')
game.init()

In [146]:
# This is the set of actions we can take in the environment
actions = np.identity(3, dtype=np.uint8)

In [147]:
state = game.get_state()

In [148]:
state.game_variables

array([50.])

In [149]:
episodes = 10
for episode in range(episodes):
    game.new_episode()
    while not game.is_episode_finished():
        state = game.get_state()
        img = state.screen_buffer
        info = state.game_variables
        reward = game.make_action(random.choice(actions), 4)
        print('reward:', reward)
        time.sleep(0.02)
    print('Result:', game.get_total_reward())
    time.sleep(2)


reward: -4.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -9.0
reward: -4.0
reward: -4.0
reward: -9.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -9.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -9.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: 99.0
Result: -17.0
reward: -4.0
reward: -4.0
reward: -9.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -9.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -9.0
reward: -4.0
reward: -4.0
reward: -9.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -9.0
reward: -4.0
reward: -4.0
reward: -9.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -4.

KeyboardInterrupt: 

In [150]:
game.close()

# 2. Converting it to a Gym Environment

In [157]:
!pip install stable-baselines3 --upgrade




In [158]:
# Import environment base class from OpenAI Gym
from gym import Env
# Import gym spaces 
from gym.spaces import Discrete, Box
# Import opencv 
import cv2

In [159]:
game.get_state().screen_buffer.shape

AttributeError: 'NoneType' object has no attribute 'screen_buffer'

In [153]:
# Create Vizdoom OpenAI Gym Environment
class VizDoomGym(Env): 
    # Function that is called when we start the env
    def __init__(self, render=False): 
        # Inherit from Env
        super().__init__()
        # Setup the game 
        self.game = DoomGame()
        self.game.load_config('github/VizDoom/scenarios/basic.cfg')
        
        # Render frame logic
        if render == False: 
            self.game.set_window_visible(False)
        else:
            self.game.set_window_visible(True)
        
        # Start the game 
        self.game.init()
        
        # Create the action space and observation space
        self.observation_space = Box(low=0, high=255, shape=(100,160,1), dtype=np.uint8) 
        self.action_space = Discrete(3)
        
    # This is how we take a step in the environment
    def step(self, action):
        
    # Specify action and take step
        actions = np.identity(3)
        reward = self.game.make_action(actions[action], 4)

    # Get all the other stuff we need to return
        if self.game.get_state():
            state = self.game.get_state().screen_buffer
            state = self.grayscale(state)
            ammo = self.game.get_state().game_variables[0]
            info = {"ammo": ammo}
        else:
            state = np.zeros(self.observation_space.shape)
            info = {"info": None}  # Set to None or any default value if no information is available

        done = self.game.is_episode_finished()

        return state, reward, done, info

    
    # Define how to render the game or environment 
    def render(self):
        
        pass
        
       
    # What happens when we start a new game 
    def reset(self): 
        self.game.new_episode()
        state = self.game.get_state().screen_buffer
        return self.grayscale(state)
    
    # Grayscale the game frame and resize it 
    def grayscale(self, observation):
        gray = cv2.cvtColor(np.moveaxis(observation, 0, -1), cv2.COLOR_BGR2GRAY)
        
        resize = cv2.resize(gray, (160,100), interpolation=cv2.INTER_CUBIC)
        state = np.reshape(resize, (100,160,1))
        return state
        
        
        
        
    
    # Call to close down the game
    def close(self): 
        self.game.close()

In [137]:
env = VizDoomGym(render=True)

In [130]:
env.step(2)

(array([[[55],
         [50],
         [59],
         ...,
         [57],
         [57],
         [66]],
 
        [[68],
         [65],
         [65],
         ...,
         [56],
         [67],
         [72]],
 
        [[49],
         [79],
         [66],
         ...,
         [79],
         [51],
         [29]],
 
        ...,
 
        [[75],
         [63],
         [62],
         ...,
         [44],
         [71],
         [60]],
 
        [[15],
         [48],
         [47],
         ...,
         [49],
         [69],
         [47]],
 
        [[22],
         [14],
         [26],
         ...,
         [57],
         [37],
         [39]]], dtype=uint8),
 -4.0,
 False,
 {'info': {'ammo': 50.0}})

In [131]:
state = env.reset()

# 3. View Game State

In [132]:
env.reset()

array([[[55],
        [50],
        [59],
        ...,
        [57],
        [57],
        [66]],

       [[68],
        [65],
        [65],
        ...,
        [56],
        [67],
        [72]],

       [[49],
        [79],
        [66],
        ...,
        [79],
        [51],
        [29]],

       ...,

       [[75],
        [63],
        [62],
        ...,
        [44],
        [71],
        [60]],

       [[15],
        [48],
        [47],
        ...,
        [49],
        [69],
        [47]],

       [[22],
        [14],
        [26],
        ...,
        [57],
        [37],
        [39]]], dtype=uint8)

In [138]:
env.close()

In [160]:
from stable_baselines3.common import env_checker


In [161]:
env_checker.check_env(env)

AssertionError: Your environment must inherit from the gymnasium.Env class cf. https://gymnasium.farama.org/api/env/

# 3. View State

In [None]:
from matplotlib import pyplot as plt

In [None]:
plt.imshow(cv2.cvtColor(state, cv2.COLOR_BGR2RGB))

# 4. Setup Callback

In [118]:
! pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

Looking in indexes: https://download.pytorch.org/whl/cu118
Collecting torch
  Downloading https://download.pytorch.org/whl/cu118/torch-2.1.2%2Bcu118-cp311-cp311-win_amd64.whl (2722.7 MB)
     ---------------------------------------- 0.0/2.7 GB ? eta -:--:--
     ---------------------------------------- 0.0/2.7 GB 1.7 MB/s eta 0:27:29
     ---------------------------------------- 0.0/2.7 GB 2.4 MB/s eta 0:19:13
     ---------------------------------------- 0.0/2.7 GB 3.7 MB/s eta 0:12:19
     ---------------------------------------- 0.0/2.7 GB 4.9 MB/s eta 0:09:18
     ---------------------------------------- 0.0/2.7 GB 5.8 MB/s eta 0:07:54
     ---------------------------------------- 0.0/2.7 GB 8.9 MB/s eta 0:05:05
     ---------------------------------------- 0.0/2.7 GB 12.6 MB/s eta 0:03:36
     ---------------------------------------- 0.0/2.7 GB 17.5 MB/s eta 0:02:36
     ---------------------------------------- 0.0/2.7 GB 19.1 MB/s eta 0:02:23
     --------------------------------

In [119]:
! pip install stable-baselines3[extra]

Collecting stable-baselines3[extra]
  Obtaining dependency information for stable-baselines3[extra] from https://files.pythonhosted.org/packages/1e/43/d4b83e644c7e42d90d76a1987fb98a2ab286a2b5593350210ca8efcc378e/stable_baselines3-2.2.1-py3-none-any.whl.metadata
  Downloading stable_baselines3-2.2.1-py3-none-any.whl.metadata (5.0 kB)
Collecting rich (from stable-baselines3[extra])
  Obtaining dependency information for rich from https://files.pythonhosted.org/packages/be/be/1520178fa01eabe014b16e72a952b9f900631142ccd03dc36cf93e30c1ce/rich-13.7.0-py3-none-any.whl.metadata
  Downloading rich-13.7.0-py3-none-any.whl.metadata (18 kB)
Collecting shimmy[atari]~=1.3.0 (from stable-baselines3[extra])
  Obtaining dependency information for shimmy[atari]~=1.3.0 from https://files.pythonhosted.org/packages/dc/f9/07ef16463db14ac1b30f149c379760f5cacf3fc677b295d29a92f3127914/Shimmy-1.3.0-py3-none-any.whl.metadata
  Downloading Shimmy-1.3.0-py3-none-any.whl.metadata (3.7 kB)
Collecting autorom[accept-

In [120]:
# Import os for file nav
import os 
# Import callback class from sb3
from stable_baselines3.common.callbacks import BaseCallback



In [None]:
class TrainAndLoggingCallback(BaseCallback):

    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path

    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)

        return True

In [None]:
CHECKPOINT_DIR = './train/train_basic'
LOG_DIR = './logs/log_basic'

In [None]:
callback = TrainAndLoggingCallback(check_freq=10000, save_path=CHECKPOINT_DIR)

# 5. Train Model

In [None]:
# import ppo for training
from stable_baselines3 import PPO

In [None]:
# Non rendered environment
env = VizDoomGym()

In [None]:
model = PPO('CnnPolicy', env, tensorboard_log=LOG_DIR, verbose=1, learning_rate=0.0001, n_steps=2048)

In [None]:
model.learn(total_timesteps=100000, callback=callback)

# 6. Test the Model

In [None]:
# Import eval policy to test agent
from stable_baselines3.common.evaluation import evaluate_policy

In [None]:
# Reload model from disc
model = PPO.load('./train/train_basic/best_model_60000')

In [None]:
# Create rendered environment
env = VizDoomGym(render=True)

In [None]:
# Evaluate mean reward for 10 games
mean_reward, _ = evaluate_policy(model, env, n_eval_episodes=100)

In [None]:
mean_reward

In [None]:
model.predict(obs)

In [None]:
for episode in range(100): 
    obs = env.reset()
    done = False
    total_reward = 0
    while not done: 
        action, _ = model.predict(obs)
        obs, reward, done, info = env.step(action)
        # time.sleep(0.20)
        total_reward += reward
    print('Total Reward for episode {} is {}'.format(total_reward, episode))
    time.sleep(2)