In [1]:
# defend the center scenario
import sys
import os
sys.path.append('..') # make sure files dont violate this convention


from envs.doom_env import VizDoomGym, DoomDefendCenterLoggerCallback
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.callbacks import CheckpointCallback, CallbackList
from stable_baselines3 import PPO, A2C
import torch

# CONFIG STUFF HERE
MODEL_SCENARIO_NAME = 'ppo_defend_center'
A2C_MODEL_SCENARIO_NAME = 'a2c_defend_center'
SCENARIO_PATH = '../scenarios/defend_the_center.cfg'
LOG_DIR = f'../logs/{MODEL_SCENARIO_NAME}/'
MODEL_DIR = f'../models/{MODEL_SCENARIO_NAME}/'

A2C_LOG_DIR = f'../logs/{A2C_MODEL_SCENARIO_NAME}/'
A2C_MODEL_DIR = f'../models/{A2C_MODEL_SCENARIO_NAME}/'
NUM_ENVS = 8
TOTAL_TIMESTEPS = 125_000
CHECKPOINT_TIMESTEPS = TOTAL_TIMESTEPS * 10 // 100

os.makedirs(LOG_DIR, exist_ok=True)
os.makedirs(MODEL_DIR, exist_ok=True)
os.makedirs(A2C_LOG_DIR, exist_ok=True)
os.makedirs(A2C_MODEL_DIR, exist_ok=True)

In [2]:
# seeding the training
import random
import numpy as np
from stable_baselines3.common.utils import set_random_seed
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)
set_random_seed(SEED)

In [3]:
# env = VizDoomGym(render=False, scenario_path="scenarios/defend_the_center.cfg")
# SETTING UP THE ENVIRONMENT
# env = DummyVecEnv([lambda: VizDoomGym(scenario_path=SCENARIO_PATH, render=False, number_of_actions=3)])

env = make_vec_env(
    VizDoomGym, # my doom environment
    n_envs=NUM_ENVS,
    env_kwargs=dict(
        scenario_path=SCENARIO_PATH,
        render=False,
        number_of_actions=3,
    ),
    vec_env_cls=DummyVecEnv,  # Auto-selects SubprocVecEnv (Linux) or DummyVecEnv (Windows)
    seed=SEED
)

In [12]:
# sanity check for the environment
from vizdoom import DoomGame
game = DoomGame()
game.load_config("../scenarios/defend_the_center.cfg")
game.set_window_visible(True)
game.init()

while not game.is_episode_finished():
    reward = game.make_action([0, 0, 1])  # shoot
    # print("Reward:", reward)
game.close()

In [4]:
# setting loggers and checkpoints for the model
checkpoint_callback = CheckpointCallback(save_freq=CHECKPOINT_TIMESTEPS // NUM_ENVS, save_path=MODEL_DIR,name_prefix=MODEL_SCENARIO_NAME)
logger_callback = DoomDefendCenterLoggerCallback(log_dir=LOG_DIR, log_file=f"{MODEL_SCENARIO_NAME}_episodes_v1_{TOTAL_TIMESTEPS}.csv", verbose=0)

combined_callback = CallbackList([checkpoint_callback, logger_callback])

# creating the model
model = PPO(
        "CnnPolicy",
        env,
        verbose=1,
        tensorboard_log=LOG_DIR,
        device="cuda" if torch.cuda.is_available() else "cpu",
        learning_rate=2.5e-4,
        n_steps=4096,
        batch_size=2048,
        gamma=0.99,
        gae_lambda=0.95,
        clip_range=0.1,
        ent_coef=0.01,
        vf_coef=0.5,
        max_grad_norm=0.5,
        seed=SEED
    )
env.reset()

Using cuda device
Wrapping the env in a VecTransposeImage.


array([[[[  2],
         [  7],
         [  8],
         ...,
         [  5],
         [  8],
         [  6]],

        [[  2],
         [  5],
         [  6],
         ...,
         [  3],
         [  0],
         [ 10]],

        [[  8],
         [  1],
         [  2],
         ...,
         [  7],
         [  4],
         [  3]],

        ...,

        [[111],
         [ 93],
         [ 87],
         ...,
         [ 68],
         [ 68],
         [ 54]],

        [[107],
         [ 93],
         [ 88],
         ...,
         [ 64],
         [ 64],
         [ 50]],

        [[ 98],
         [ 78],
         [ 74],
         ...,
         [ 50],
         [ 49],
         [ 49]]],


       [[[  2],
         [  7],
         [  8],
         ...,
         [  5],
         [  8],
         [  6]],

        [[  2],
         [  5],
         [  6],
         ...,
         [  3],
         [  0],
         [ 10]],

        [[  8],
         [  1],
         [  2],
         ...,
         [  7],
         [

fps = 43


In [5]:
model.learn(
        total_timesteps=TOTAL_TIMESTEPS // NUM_ENVS * NUM_ENVS,
        callback=combined_callback,
        progress_bar=True,
        tb_log_name=MODEL_SCENARIO_NAME
)

model.save(os.path.join(MODEL_DIR, "ppo_defend_center_100k_final"))
env.close()
print(f"\nTraining complete. Model saved to {MODEL_DIR}")

Logging to ../logs/ppo_defend_center/ppo_defend_center_2


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 80.5     |
|    ep_rew_mean     | -7.4     |
| time/              |          |
|    fps             | 50       |
|    iterations      | 1        |
|    time_elapsed    | 653      |
|    total_timesteps | 32768    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 82.8        |
|    ep_rew_mean          | -7.16       |
| time/                   |             |
|    fps                  | 49          |
|    iterations           | 2           |
|    time_elapsed         | 1313        |
|    total_timesteps      | 65536       |
| train/                  |             |
|    approx_kl            | 0.002311129 |
|    clip_fraction        | 0.0576      |
|    clip_range           | 0.1         |
|    entropy_loss         | -1.1        |
|    explained_variance   | -0.0145     |
|    learning_rate        | 0.


Training complete. Model saved to ../models/ppo_defend_center/


In [4]:
# A2C Model training setup

a2c_checkpoint_callback = CheckpointCallback(
    save_freq=CHECKPOINT_TIMESTEPS // NUM_ENVS,
    save_path=A2C_MODEL_DIR,
    name_prefix=A2C_MODEL_SCENARIO_NAME
)
a2c_episode_logger = DoomDefendCenterLoggerCallback(
    log_dir=A2C_LOG_DIR,
    log_file=f"{A2C_MODEL_SCENARIO_NAME}_episodes_v1_{TOTAL_TIMESTEPS}.csv",
    verbose=0
)
a2c_combined_callback = CallbackList([a2c_checkpoint_callback, a2c_episode_logger])

# creating the model
a2c_model = A2C(
    "CnnPolicy",
    env,
    verbose=1,
    tensorboard_log=A2C_LOG_DIR,
    device="cuda" if torch.cuda.is_available() else "cpu",
    learning_rate=2.5e-4,
    n_steps=4096,        # rollout length
    gamma=0.99,
    gae_lambda=0.95,
    ent_coef=0.05,
    vf_coef=0.4,
    max_grad_norm=0.5,
    seed=SEED
)

env.reset()

Using cuda device
Wrapping the env in a VecTransposeImage.


array([[[[  2],
         [  7],
         [  8],
         ...,
         [  5],
         [  8],
         [  6]],

        [[  2],
         [  5],
         [  6],
         ...,
         [  3],
         [  0],
         [ 10]],

        [[  8],
         [  1],
         [  2],
         ...,
         [  7],
         [  4],
         [  3]],

        ...,

        [[111],
         [ 93],
         [ 87],
         ...,
         [ 68],
         [ 68],
         [ 54]],

        [[107],
         [ 93],
         [ 88],
         ...,
         [ 64],
         [ 64],
         [ 50]],

        [[ 98],
         [ 78],
         [ 74],
         ...,
         [ 50],
         [ 49],
         [ 49]]],


       [[[  2],
         [  7],
         [  8],
         ...,
         [  5],
         [  8],
         [  6]],

        [[  2],
         [  5],
         [  6],
         ...,
         [  3],
         [  0],
         [ 10]],

        [[  8],
         [  1],
         [  2],
         ...,
         [  7],
         [

In [5]:
a2c_model.learn(
    total_timesteps=TOTAL_TIMESTEPS // NUM_ENVS * NUM_ENVS,
    callback=a2c_combined_callback,
    progress_bar=True,
    tb_log_name=f"{A2C_MODEL_SCENARIO_NAME}_{TOTAL_TIMESTEPS}"
)

a2c_model.save(os.path.join(A2C_MODEL_DIR, f"a2c_defend_center_{TOTAL_TIMESTEPS}_final"))
env.close()
print(f"\nTraining complete. Model saved to {A2C_MODEL_DIR}")

Logging to ../logs/a2c_defend_center/a2c_defend_center_125000_1


In [8]:
# from stable_baselines3 import PPO
# from envs.vizdoom_env import VizDoomGym
import time
from vizdoom import ScreenResolution
# import cv2

# VIDEO_DIR = '../videos/'
# os.makedirs(VIDEO_DIR, exist_ok=True)
# VIDEO_PATH = os.path.join(VIDEO_DIR, "defend_center_ppo.mp4")

eval_env = VizDoomGym(SCENARIO_PATH, render=True)

# Resizing for evaluation
eval_env.game.set_window_visible(True)
eval_env.game.set_screen_resolution(ScreenResolution.RES_1280X960)
eval_env.game.init()  # Re-initialize after changing resolution

model = PPO.load(f"{MODEL_DIR}ppo_defend_center_125000_final")
model = A2C.load(f"{A2C_MODEL_DIR}a2c_defend_center_125000_final")

for ep in range(5):
    obs, info = eval_env.reset()
    done = False
    total_reward = 0
    while not done:
        action, _ = model.predict(obs)
        obs, reward, terminated, truncated, info = eval_env.step(action)
        done = terminated or truncated
        total_reward += reward
        time.sleep(0.02)
    
    # print(f"Episode {ep+1} reward: {total_reward}")
    # print(f"Video saved to {VIDEO_DIR}")

eval_env.close()