# 1. Getting VizDoom Up and Running

In [18]:
!pip install vizdoom



In [19]:
!cd github & git clone https://github.com/mwydmuch/ViZDoom

zsh:cd:1: no such file or directory: github
fatal: destination path 'ViZDoom' already exists and is not an empty directory.


In [20]:
# Import vizdoom for game env
from vizdoom import * 
# Import random for action sampling
import random
# Import time for sleeping
import time 
# Import numpy for identity matrix
import numpy as np

In [21]:
# Setup game
game = DoomGame()
game.load_config('deadly_corridor.cfg')
game.init()

2025-01-26 14:54:28.212 vizdoom[22381:10249707] +[IMKClient subclass]: chose IMKClient_Modern
2025-01-26 14:54:28.212 vizdoom[22381:10249707] +[IMKInputSession subclass]: chose IMKInputSession_Modern


In [22]:
# This is the set of actions we can take in the environment
actions = np.identity(7, dtype=np.uint8)

In [23]:
state = game.get_state()

In [24]:
state.game_variables

array([100.])

In [25]:
# Loop through episodes 
episodes = 10 
for episode in range(episodes): 
    # Create a new episode or game 
    game.new_episode()
    # Check the game isn't done 
    while not game.is_episode_finished(): 
        # Get the game state 
        state = game.get_state()
        # Get the game image 
        img = state.screen_buffer
        # Get the game variables - ammo
        info = state.game_variables
        # Take an action
        reward = game.make_action(random.choice(actions),4)
        # Print rewward 
        # print('reward:', reward) 
        time.sleep(0.02)
    print('Result:', game.get_total_reward())
    time.sleep(2)

Result: 20.8433837890625
Result: -82.4759521484375
Result: -82.05613708496094
Result: -115.99571228027344
Result: -115.99253845214844
Result: -83.51568603515625
Result: -72.17813110351562
Result: -106.56629943847656
Result: -47.379547119140625
Result: -58.019256591796875


In [26]:
#S1 - 336
#S2 - -29
#S3 - -20
#S4 - -95
#S5 - -99

In [27]:
game.close()

In [28]:
game.get_state().game_variables

AttributeError: 'NoneType' object has no attribute 'game_variables'

# 2. Converting it to a Gym Environment

In [11]:
!pip install gym



In [29]:
# Import environment base class from OpenAI Gym
from gym import Env
# Import gym spaces 
from gym.spaces import Discrete, Box
# Import opencv 
import cv2

In [39]:
# Create Vizdoom OpenAI Gym Environment
class VizDoomGym(Env): 
    # Function that is called when we start the env
    def __init__(self, render=False, config='deadly_corridor.cfg'): 
        # Inherit from Env
        super().__init__()
        # Setup the game 
        self.game = DoomGame()
        self.game.load_config(config)
        
        # Render frame logic
        if render == False: 
            self.game.set_window_visible(False)
        else:
            self.game.set_window_visible(True)
        
        # Start the game 
        self.game.init()
        
        # Create the action space and observation space
        self.observation_space = Box(low=0, high=255, shape=(100,160,1), dtype=np.uint8) 
        self.action_space = Discrete(7)
        
        # Game variables: HEALTH DAMAGE_TAKEN HITCOUNT SELECTED_WEAPON_AMMO
        self.damage_taken = 0
        self.hitcount = 0
        self.ammo = 52 ## CHANGED
        
        
    # This is how we take a step in the environment
    def step(self, action):
        # Specify action and take step 
        actions = np.identity(7)
        movement_reward = self.game.make_action(actions[action], 4) 
        
        reward = 0 
        # Get all the other stuff we need to retun 
        if self.game.get_state(): 
            state = self.game.get_state().screen_buffer
            state = self.grayscale(state)
            
            # Reward shaping
            game_variables = self.game.get_state().game_variables
            health, damage_taken, hitcount, ammo = game_variables
            
            # Calculate reward deltas
            damage_taken_delta = -damage_taken + self.damage_taken
            self.damage_taken = damage_taken
            hitcount_delta = hitcount - self.hitcount
            self.hitcount = hitcount
            ammo_delta = ammo - self.ammo
            self.ammo = ammo
            
            reward = movement_reward + damage_taken_delta*10 + hitcount_delta*200  + ammo_delta*5 
            info = ammo
        else: 
            state = np.zeros(self.observation_space.shape)
            info = 0 
        
        info = {"info":info}
        done = self.game.is_episode_finished()
        
        return state, reward, done, info 
    
    # Define how to render the game or environment 
    def render(): 
        pass
    
    # What happens when we start a new game 
    def reset(self): 
        self.game.new_episode()
        state = self.game.get_state().screen_buffer
        return self.grayscale(state)
    
    # Grayscale the game frame and resize it 
    def grayscale(self, observation):
        gray = cv2.cvtColor(np.moveaxis(observation, 0, -1), cv2.COLOR_BGR2GRAY)
        resize = cv2.resize(gray, (160,100), interpolation=cv2.INTER_CUBIC)
        state = np.reshape(resize, (100,160,1))
        return state
    
    # Call to close down the game
    def close(self): 
        self.game.close()

In [17]:
env.close()

NameError: name 'env' is not defined

In [40]:
env = VizDoomGym(render=True)

2025-01-26 15:00:05.656 vizdoom[22433:10252436] +[IMKClient subclass]: chose IMKClient_Modern
2025-01-26 15:00:05.656 vizdoom[22433:10252436] +[IMKInputSession subclass]: chose IMKInputSession_Modern


In [None]:
res = env.step(5)
res[1]

2995.7413482666016

In [None]:
res[1]

0.0

In [None]:
state = env.reset()

# 3. View Game State

In [None]:
env.reset()

array([[[32],
        [33],
        [25],
        ...,
        [27],
        [23],
        [24]],

       [[27],
        [33],
        [23],
        ...,
        [24],
        [24],
        [24]],

       [[20],
        [35],
        [23],
        ...,
        [24],
        [24],
        [24]],

       ...,

       [[75],
        [63],
        [62],
        ...,
        [44],
        [71],
        [60]],

       [[15],
        [48],
        [47],
        ...,
        [49],
        [69],
        [47]],

       [[22],
        [14],
        [26],
        ...,
        [57],
        [37],
        [39]]], dtype=uint8)

In [None]:
# Import Environment checker
from stable_baselines3.common import env_checker

In [5]:
env_checker.check_env(env)

NameError: name 'env' is not defined

# 3. View State

In [None]:
!pip install matplotlib



You should consider upgrading via the 'd:\youtube\26-05-2021 - vizdoom\vizdoom\scripts\python.exe -m pip install --upgrade pip' command.


In [15]:
from matplotlib import pyplot as plt

In [16]:
plt.imshow(cv2.cvtColor(state, cv2.COLOR_BGR2RGB))

error: OpenCV(4.10.0) :-1: error: (-5:Bad argument) in function 'cvtColor'
> Overload resolution failed:
>  - src is not a numpy array, neither a scalar
>  - Expected Ptr<cv::UMat> for argument 'src'


In [None]:
env.close()

# 4. Setup Callback

In [17]:
!pip install torch==1.10.1+cu113 torchvision==0.11.2+cu113 torchaudio===0.10.1+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html

Looking in links: https://download.pytorch.org/whl/cu113/torch_stable.html


ERROR: Could not find a version that satisfies the requirement torch==1.10.1+cu113 (from versions: 2.0.0, 2.0.1, 2.1.0, 2.1.1, 2.1.2, 2.2.0, 2.2.1, 2.2.2, 2.3.0, 2.3.1, 2.4.0, 2.4.1, 2.5.0, 2.5.1)
ERROR: No matching distribution found for torch==1.10.1+cu113


In [18]:
!pip install stable-baselines3[extra]

Collecting tensorboard>=2.9.1 (from stable-baselines3[extra])
  Downloading tensorboard-2.18.0-py3-none-any.whl.metadata (1.6 kB)
Collecting tqdm (from stable-baselines3[extra])
  Downloading tqdm-4.67.0-py3-none-any.whl.metadata (57 kB)
Collecting rich (from stable-baselines3[extra])
  Downloading rich-13.9.4-py3-none-any.whl.metadata (18 kB)
Collecting ale-py>=0.9.0 (from stable-baselines3[extra])
  Downloading ale_py-0.10.1-cp311-cp311-win_amd64.whl.metadata (7.8 kB)
Collecting absl-py>=0.4 (from tensorboard>=2.9.1->stable-baselines3[extra])
  Using cached absl_py-2.1.0-py3-none-any.whl.metadata (2.3 kB)
Collecting grpcio>=1.48.2 (from tensorboard>=2.9.1->stable-baselines3[extra])
  Downloading grpcio-1.68.0-cp311-cp311-win_amd64.whl.metadata (4.0 kB)
Collecting markdown>=2.6.8 (from tensorboard>=2.9.1->stable-baselines3[extra])
  Downloading Markdown-3.7-py3-none-any.whl.metadata (7.0 kB)
Collecting protobuf!=4.24.0,>=3.19.6 (from tensorboard>=2.9.1->stable-baselines3[extra])
  Dow

In [4]:
# Import os for file nav
import os 
# Import callback class from sb3
from stable_baselines3.common.callbacks import BaseCallback

In [5]:
class TrainAndLoggingCallback(BaseCallback):

    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path

    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)

        return True

In [24]:
CHECKPOINT_DIR = './train/train_corridor'
LOG_DIR = './logs/log_corridor'

In [7]:
callback = TrainAndLoggingCallback(check_freq=10000, save_path=CHECKPOINT_DIR)

# 5. Train Our Model Using Curriculum 

In [36]:
# import ppo for training
from stable_baselines3 import PPO

In [None]:
# Non rendered environment
env = VizDoomGym(config='deadly_corridor.cfg')

NameError: name 'DoomGame' is not defined

# Changes made
- Effects of hyperparameter tuning
- Increased initial timestep length

## Possibly look into
- Changing reward function and game variables due to bug

In [25]:
#model = PPO('CnnPolicy', env, tensorboard_log=LOG_DIR, verbose=1, learning_rate=0.0001, n_steps=4096)
model = PPO('CnnPolicy', env, tensorboard_log=LOG_DIR, verbose=1, learning_rate=0.00001, n_steps=8192, clip_range=.1, gamma=.95, gae_lambda=.9)

Using cpu device




Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


In [23]:
#model.learn(total_timesteps=40000, callback=callback)
model.learn(total_timesteps=400000, callback=callback)

Logging to ./logs/log_corridor\PPO_30
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 93.5     |
|    ep_rew_mean     | 954      |
| time/              |          |
|    fps             | 60       |
|    iterations      | 1        |
|    time_elapsed    | 67       |
|    total_timesteps | 4096     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 96.8        |
|    ep_rew_mean          | 971         |
| time/                   |             |
|    fps                  | 56          |
|    iterations           | 2           |
|    time_elapsed         | 143         |
|    total_timesteps      | 8192        |
| train/                  |             |
|    approx_kl            | 0.054793693 |
|    clip_fraction        | 0.3         |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.647      |
|    explained_variance   | 0.898 

<stable_baselines3.ppo.ppo.PPO at 0x2344cc78f98>

In [27]:
model.load('DeadlyCorridor560k.zip')

Exception: code expected at least 16 arguments, got 15
Exception: code expected at least 16 arguments, got 15
	Missing key(s) in state_dict: "pi_features_extractor.cnn.0.weight", "pi_features_extractor.cnn.0.bias", "pi_features_extractor.cnn.2.weight", "pi_features_extractor.cnn.2.bias", "pi_features_extractor.cnn.4.weight", "pi_features_extractor.cnn.4.bias", "pi_features_extractor.linear.0.weight", "pi_features_extractor.linear.0.bias", "vf_features_extractor.cnn.0.weight", "vf_features_extractor.cnn.0.bias", "vf_features_extractor.cnn.2.weight", "vf_features_extractor.cnn.2.bias", "vf_features_extractor.cnn.4.weight", "vf_features_extractor.cnn.4.bias", "vf_features_extractor.linear.0.weight", "vf_features_extractor.linear.0.bias".  


<stable_baselines3.ppo.ppo.PPO at 0x2209cc4e4d0>

In [29]:
env = VizDoomGym(config='deadly_corridor.cfg')
model.set_env(env)
model.learn(total_timesteps=40000, callback=callback)

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.




NameError: name 'callback' is not defined

In [None]:
env = VizDoomGym(config='github/VizDoom/scenarios/deadly_corridor_s3.cfg')
model.set_env(env)
model.learn(total_timesteps=40000, callback=callback)

In [None]:
env = VizDoomGym(config='github/VizDoom/scenarios/deadly_corridor_s4.cfg')
model.set_env(env)
model.learn(total_timesteps=40000, callback=callback)

In [None]:
env = VizDoomGym(config='github/VizDoom/scenarios/deadly_corridor_s5.cfg')
model.set_env(env)
model.learn(total_timesteps=40000, callback=callback)

# 6. Test the Model

In [45]:
# Import eval policy to test agent
from stable_baselines3.common.evaluation import evaluate_policy

In [41]:
# Reload model from disc
model = PPO.load('DeadlyCorridor560k.zip')

Exception: 'bytes' object cannot be interpreted as an integer
Exception: 'bytes' object cannot be interpreted as an integer


In [43]:
# Create rendered environment
env = VizDoomGym(render=True, config='deadly_corridor.cfg')

2025-01-26 15:00:26.576 vizdoom[22447:10252722] +[IMKClient subclass]: chose IMKClient_Modern
2025-01-26 15:00:26.576 vizdoom[22447:10252722] +[IMKInputSession subclass]: chose IMKInputSession_Modern


In [46]:
# Evaluate mean reward for 10 games
mean_reward, _ = evaluate_policy(model, env, n_eval_episodes=10)



ValueError: not enough values to unpack (expected 4, got 1)

In [46]:
for episode in range(20): 
    obs = env.reset()
    done = False
    total_reward = 0
    while not done: 
        action, _ = model.predict(obs)
        obs, reward, done, info = env.step(action)
        time.sleep(0.02)
        total_reward += reward
    print('Total Reward for episode {} is {}'.format(total_reward, episode))
    time.sleep(2)

Total Reward for episode 1028.1819915771484 is 0
Total Reward for episode 2453.253128051758 is 1
Total Reward for episode 682.4454345703125 is 2
Total Reward for episode 406.0699768066406 is 3
Total Reward for episode 1458.8362579345703 is 4
Total Reward for episode -33.761932373046875 is 5
Total Reward for episode 2656.292770385742 is 6
Total Reward for episode -706.5568542480469 is 7
Total Reward for episode 369.5380096435547 is 8
Total Reward for episode 273.89292907714844 is 9
Total Reward for episode 2972.891387939453 is 10


KeyboardInterrupt: 

In [11]:
import numpy as np

# Replace 'filename.npz' with the path to your .npz file
data = np.load('log_data_50000.npz')
data2 = np.load('train_data_1898.npz')

print(data)
print(data2)

# List all arrays stored in the .npz file
print("Arrays in the .npz file:", data.files)

# Access individual arrays by their names
# Replace 'array_name' with the actual name of the array you want to access
array = data['observations']
array2 = data2['obs']
array=array.squeeze(axis=1)

# Use the array (for example, print its contents)
print("Array contents observations obs file:", array.shape)
print("Array contents:", array2.shape)

array = data['actions']
array2 = data2['action']


# Use the array (for example, print its contents)
print("Array contents actions yt file:", array.shape)
print("Array contents:", array2.shape)
array = data['rewards']
array2 = data2['reward']


# Use the array (for example, print its contents)
print("Array contents rewards yt file:", array.shape)
print("Array contents rewards:", array2.shape)
array = data['dones']
array2 = data2['done']

array=array.reshape(-1)
array2=array2.reshape(-1)

# Use the array (for example, print its contents)
print("Array contents dones yt file:", array.shape)
print("Array contents done:", array2.shape)
# Close the .npz file after loading
data.close()


NpzFile 'log_data_50000.npz' with keys: observations, actions, rewards, dones
NpzFile 'train_data_1898.npz' with keys: obs, action, reward, done, is_red
Arrays in the .npz file: ['observations', 'actions', 'rewards', 'dones']
Array contents observations obs file: (90, 3, 64, 112)
Array contents: (90, 3, 64, 112)
Array contents actions yt file: (90, 1)
Array contents: (90,)
Array contents rewards yt file: (90, 1)
Array contents rewards: (90,)
Array contents dones yt file: (90,)
Array contents done: (90,)
