In [1]:
# ─── INSTALACIONES ─────────────────────────────────────────────────────────────
!pip install vizdoom==1.2.4 pyvirtualdisplay imageio imageio-ffmpeg opencv-python-headless stable-baselines3 --quiet


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m28.1/28.1 MB[0m [31m57.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m97.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m78.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m42.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
# ─── IMPORTS ───────────────────────────────────────────────────────────────────
import os
import random
import numpy as np
import matplotlib.pyplot as plt
import imageio
import cv2

from pyvirtualdisplay import Display
from collections import deque
from IPython.display import Video, display

import torch as th
import torch.nn as nn

import vizdoom as vzd

import gym
from gym.wrappers import FrameStack

from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv, VecTransposeImage, VecFrameStack
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor

# ─── CONFIGURACIÓN GPU Y DISPLAY ───────────────────────────────────────────────
device = th.device("cuda" if th.cuda.is_available() else "cpu")
Display(visible=0, size=(640, 480)).start()

2025-07-22 23:36:55.639751: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1753227415.875455      13 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1753227415.942953      13 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


<pyvirtualdisplay.display.Display at 0x7b6170b6bb50>

In [3]:
# ─── DESCARGA DEL ESCENARIO PERSONALIZADO ──────────────────────────────────────
!wget -q https://raw.githubusercontent.com/lkiel/rl-doom/develop/scenarios/bots_deathmatch_multimaps.cfg -O bots_deathmatch_multimaps.cfg
!wget -q https://raw.githubusercontent.com/lkiel/rl-doom/develop/scenarios/bots_deathmatch_multimaps.wad -O bots_deathmatch_multimaps.wad

In [4]:
# ─── ARCHIVO DE CONFIGURACIÓN PERSONALIZADA ────────────────────────────────────
with open("reward_shaped.cfg", "w") as f:
    f.write("""
episode_timeout = 4096
screen_resolution = RES_160X120
screen_format = RGB24
render_hud = false
render_weapon = false
render_crosshair = false
window_visible = false

available_buttons =
    {
        ATTACK
        MOVE_FORWARD
        TURN_LEFT
        TURN_RIGHT
        MOVE_LEFT
        MOVE_RIGHT
    }

available_game_variables =
    {
        KILLCOUNT
        DAMAGECOUNT
        SELECTED_WEAPON
        SELECTED_WEAPON_AMMO
        HEALTH
        ARMOR
        POSITION_X
        POSITION_Y
    }

mode = PLAYER
""")

In [5]:
# ─── FUNCIÓN PARA INICIALIZAR EL JUEGO ─────────────────────────────────────────
def create_game():
    game = vzd.DoomGame()
    game.load_config("reward_shaped.cfg")
    game.set_doom_scenario_path("bots_deathmatch_multimaps.wad")
    game.set_doom_map("M")
    game.add_game_args("-host 1 -deathmatch +sv_spawnfarthest 1 +viz_nocheat 0 +sv_forcerespawn 1")
    game.set_mode(vzd.Mode.PLAYER)
    game.init()
    game.send_game_command("removebots")
    for _ in range(5):
        game.send_game_command("addbot")
    return game

In [6]:
class DoomShapedEnv(gym.Env):
    """
    Entorno Gym personalizado para ViZDoom con reward shaping.
    Observación: imagen RGB 120x160 (canal primero).
    Acción: una de 14 combinaciones discretas.
    """
    def __init__(self, frame_skip=1):
        super().__init__()
        self.frame_skip = frame_skip
        self.game = create_game()
        self.game.new_episode()

        # Marcamos que el env renderiza frames para poder capturarlos
        self.render_mode = "rgb_array"

        # Espacio de observación: (canales, alto, ancho)
        # Observación: (C, H, W)
        self.observation_space = gym.spaces.Box(
            low=0, high=255, shape=(3, 120, 160), dtype=np.uint8
        )

        # Definimos las 14 posibles acciones (combinaciones de botones)
        self.actions = [
            [1, 0, 0, 0, 0, 0],  # atacar
            [0, 1, 0, 0, 0, 0],  # avanzar
            [0, 0, 1, 0, 0, 0],  # girar izquierda
            [0, 0, 0, 1, 0, 0],  # girar derecha
            [0, 0, 0, 0, 1, 0],  # moverse izquierda
            [0, 0, 0, 0, 0, 1],  # moverse derecha
            [0, 1, 1, 0, 0, 0],  # avanzar + girar izq
            [0, 1, 0, 1, 0, 0],  # avanzar + girar der
            [1, 0, 1, 0, 0, 0],  # atacar + girar izq
            [1, 0, 0, 1, 0, 0],  # atacar + girar der
            [1, 1, 0, 0, 0, 0],  # atacar + avanzar
            [1, 0, 0, 0, 1, 0],  # atacar + moverse izq
            [1, 0, 0, 0, 0, 1],  # atacar + moverse der
            [0, 0, 0, 0, 0, 0],  # nada
        ]
        self.action_space = gym.spaces.Discrete(len(self.actions))

        # Variables internas para el shaping
        self.last_health = 100
        self.last_armor = 0
        self.last_pos = (
            self.game.get_game_variable(vzd.GameVariable.POSITION_X),
            self.game.get_game_variable(vzd.GameVariable.POSITION_Y)
        )
        self.last_damage = 0
        self.ammo_state = self._get_ammo_state()
        self.weapon_state = self._get_weapon_state()
        self.last_kills = self.game.get_game_variable(vzd.GameVariable.KILLCOUNT)
        self.last_weapon_id = self.game.get_game_variable(vzd.GameVariable.SELECTED_WEAPON)

    def _get_ammo_state(self):
        ammo = np.zeros(10, dtype=np.int32)
        for i in range(10):
            ammo[i] = self.game.get_game_variable(getattr(vzd.GameVariable, f"AMMO{i}"))
        return ammo

    def _get_weapon_state(self):
        weapons = np.zeros(10, dtype=np.int32)
        for i in range(10):
            weapons[i] = self.game.get_game_variable(getattr(vzd.GameVariable, f"WEAPON{i}"))
        return weapons

    def step(self, action_idx):
        if self.game.is_episode_finished():
            # Si el episodio terminó, reiniciamos
            obs = self.reset()
            return obs, 0.0, True, {}

        action = self.actions[action_idx]
        reward = self.game.make_action(action, self.frame_skip)
        done = self.game.is_episode_finished()
        state = self.game.get_state()
        obs = np.zeros(self.observation_space.shape, dtype=np.uint8) if state is None else np.transpose(state.screen_buffer, (2, 0, 1))

        # Capturamos variables actuales para el shaping
        health = self.game.get_game_variable(vzd.GameVariable.HEALTH)
        armor = self.game.get_game_variable(vzd.GameVariable.ARMOR)
        damage = self.game.get_game_variable(vzd.GameVariable.DAMAGECOUNT)
        killcount = self.game.get_game_variable(vzd.GameVariable.KILLCOUNT)
        weapon_id = self.game.get_game_variable(vzd.GameVariable.SELECTED_WEAPON)
        x = self.game.get_game_variable(vzd.GameVariable.POSITION_X)
        y = self.game.get_game_variable(vzd.GameVariable.POSITION_Y)
        is_alive = health > 0

        shaped_reward = reward

        if is_alive:
            # Cambio de arma
            extra_weapon_change = 0.05 if weapon_id != self.last_weapon_id else 0.0

            # Daño infligido
            damage_delta = damage - self.last_damage
            extra_damage = 0.01 * damage_delta

            # Munición (solo si no cambió de arma)
            new_ammo = self._get_ammo_state()
            if weapon_id == self.last_weapon_id:
                ammo_diff = (new_ammo - self.ammo_state) * self.weapon_state
                pickup = np.clip(ammo_diff, 0, None).sum()
                usage = -np.clip(ammo_diff, None, 0).sum()
                extra_ammo = 0.02 * pickup + 0.01 * usage
            else:
                extra_ammo = 0.0

            # Salud
            if self.last_health > 0:
                health_gain = max(0, health - self.last_health)
                health_loss = -min(0, health - self.last_health)
            else:
                health_gain = 0
                health_loss = 0
            extra_health = 0.02 * health_gain - 0.01 * health_loss

            # Armadura
            armor_gain = max(0, armor - self.last_armor)
            extra_armor = 0.01 * armor_gain

            # Movimiento
            dist = np.sqrt((x - self.last_pos[0])**2 + (y - self.last_pos[1])**2)
            extra_move = 0.0005 if dist > 3 else -0.0025

            # Kills
            kill_delta = killcount - self.last_kills
            extra_kill = 1.0 * max(0, kill_delta)

            # Sumamos todo al reward
            shaped_reward += (
                extra_damage + extra_ammo + extra_health + extra_armor +
                extra_move + extra_kill + extra_weapon_change
            )
        else:
            # Si murió: contamos kills justo antes de morir
            kill_delta = killcount - self.last_kills
            shaped_reward += 1.0 * max(0, kill_delta)
            new_ammo = self._get_ammo_state()

        # Actualizamos estado interno
        self.last_health = health
        self.last_armor = armor
        self.last_pos = (x, y)
        self.last_damage = damage
        self.last_kills = killcount
        self.ammo_state = new_ammo
        self.last_weapon_id = weapon_id

        return obs, shaped_reward, done, {}

    def reset(self):
        self.game.new_episode()
    
        # Forzar re-adición de bots (garantizado)
        #self.game.send_game_command("removebots")
        #for _ in range(5):
        #    self.game.send_game_command("addbot")
    
        self.last_health = 100
        self.last_armor = 0
        self.last_pos = (
            self.game.get_game_variable(vzd.GameVariable.POSITION_X),
            self.game.get_game_variable(vzd.GameVariable.POSITION_Y)
        )
        self.last_damage = 0
        self.ammo_state = self._get_ammo_state()
        self.weapon_state = self._get_weapon_state()
        self.last_kills = self.game.get_game_variable(vzd.GameVariable.KILLCOUNT)
        self.last_weapon_id = self.game.get_game_variable(vzd.GameVariable.SELECTED_WEAPON)
        
        state = self.game.get_state()
        return np.zeros(self.observation_space.shape, dtype=np.uint8) if state is None else np.transpose(state.screen_buffer, (2, 0, 1))
    

    def render(self, mode='rgb_array'):
        # Render del entorno devolviendo la imagen actual (H, W, C)
        state = self.game.get_state()
        if state is None:
            return np.zeros((120, 160, 3), dtype=np.uint8)
        return state.screen_buffer






In [7]:
class CustomCNN(BaseFeaturesExtractor):
    """
    Red convolucional personalizada para extraer características de la pantalla de Doom.
    Arquitectura: 3 capas conv (ReLU) + capa lineal final.
    """
    def __init__(self, observation_space: gym.spaces.Box, features_dim: int = 512):
        super(CustomCNN, self).__init__(observation_space, features_dim)
        
        n_channels = observation_space.shape[0]  # canale(s) de la imagen

        self.cnn = nn.Sequential(
            nn.Conv2d(n_channels, 32, kernel_size=8, stride=4), nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2), nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1), nn.ReLU(),
            nn.Flatten()
        )

        # Determinar tamaño de la salida de la CNN para la capa lineal
        with th.no_grad():
            sample_input = th.zeros(1, *observation_space.shape)
            n_flatten = self.cnn(sample_input).shape[1]

        self.linear = nn.Sequential(
            nn.Linear(n_flatten, features_dim),
            nn.ReLU()
        )

    def forward(self, observations: th.Tensor) -> th.Tensor:
        # Normalizar píxeles y pasar por CNN y luego capa lineal
        x = observations / 255.0
        x = self.cnn(x)
        return self.linear(x)


In [8]:
def make_env():
    def _init():
        env = DoomShapedEnv(frame_skip=1)
        return env
    return _init


In [9]:
import shutil
# ─── CARGAR MODELO DESDE ZIP ──────────────────────────────────────────────────
#Cambiar la ruta por la definida para el modelo
shutil.make_archive("ppo_vdoom_phase1", 'zip', "/kaggle/input/ppo-doom-phase1-nobots")
model = PPO.load("ppo_vdoom_phase1.zip", device=th.device("cuda" if th.cuda.is_available() else "cpu"))


In [10]:
print(model.policy)

ActorCriticCnnPolicy(
  (features_extractor): CustomCNN(
    (cnn): Sequential(
      (0): Conv2d(12, 32, kernel_size=(8, 8), stride=(4, 4))
      (1): ReLU()
      (2): Conv2d(32, 64, kernel_size=(4, 4), stride=(2, 2))
      (3): ReLU()
      (4): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
      (5): ReLU()
      (6): Flatten(start_dim=1, end_dim=-1)
    )
    (linear): Sequential(
      (0): Linear(in_features=11264, out_features=512, bias=True)
      (1): ReLU()
    )
  )
  (pi_features_extractor): CustomCNN(
    (cnn): Sequential(
      (0): Conv2d(12, 32, kernel_size=(8, 8), stride=(4, 4))
      (1): ReLU()
      (2): Conv2d(32, 64, kernel_size=(4, 4), stride=(2, 2))
      (3): ReLU()
      (4): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
      (5): ReLU()
      (6): Flatten(start_dim=1, end_dim=-1)
    )
    (linear): Sequential(
      (0): Linear(in_features=11264, out_features=512, bias=True)
      (1): ReLU()
    )
  )
  (vf_features_extractor): CustomCNN(
    (cn

In [11]:
## ─── EVALUACIÓN Y GRABACIÓN DE VIDEO ───────────────────────────────────────────
def evaluate_and_record(model, filename="doom_phase1_eval.mp4", num_episodes=2):
    """
    Evalúa el modelo en el entorno y guarda un video con la jugada.
    """
    # Creamos un nuevo entorno de evaluación (un solo entorno)
    env_eval = DummyVecEnv([make_env()])
    env_eval = VecFrameStack(env_eval, n_stack=4, channels_order='first')
    env_eval.venv.render_mode = "rgb_array"  # aseguramos que genere imágenes

    writer = imageio.get_writer(filename, fps=15)
    rewards = []

    for ep in range(num_episodes):
        obs = env_eval.reset()
        done = [False]
        total_reward = 0.0

        while not done[0]:
            action, _ = model.predict(obs, deterministic=True)
            obs, reward, done, _ = env_eval.step(action)
            total_reward += reward[0]

            # Renderizamos el entorno y agregamos frame al video
            frame = env_eval.render(mode='rgb_array')
            if frame is not None:
                writer.append_data(frame)

        print(f"Episodio {ep+1}: reward = {total_reward:.2f}")
        rewards.append(total_reward)

    writer.close()
    print("Recompensas por episodio:", rewards)
    display(Video(filename, embed=True))

# Llamamos a la función para evaluar y grabar el video
evaluate_and_record(model, filename="doom_phase1_eval.mp4", num_episodes=1)

See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


Episodio 1: reward = 4.07
Recompensas por episodio: [4.066000072634779]
