<a href="https://www.kaggle.com/code/juacastanori/doom-torch?scriptVersionId=249331554" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
# ───── INSTALACIÓN ─────
!pip install -q vizdoom==1.2.4 gymnasium==0.28.1 pyvirtualdisplay \
               imageio imageio-ffmpeg opencv-python-headless \
               torch torchvision

In [None]:
# ───── IMPORTS ─────
from pyvirtualdisplay import Display
Display(visible=0, size=(640,480)).start()

import gymnasium as gnm, gym as ogym
import numpy as np, random, cv2, imageio
import torch, torch.nn as nn, torch.optim as optim
from collections import deque, namedtuple
from vizdoom import gymnasium_wrapper
from IPython.display import Video, display
import matplotlib.pyplot as plt


In [None]:
# ───── ENV WRAPPER ─────
class RGBOnly(ogym.Wrapper):
    def __init__(self, env):
        super().__init__(env)
        self.action_space = ogym.spaces.Discrete(env.action_space.n)
        self.observation_space = ogym.spaces.Box(0,255,(240,320,3),np.uint8)
    def reset(self, **kwargs):
        obs,_ = self.env.reset(**kwargs)
        return obs["screen"]
    def step(self, action):
        obs, r, term, trunc, info = self.env.step(action)
        return obs["screen"], r, term or trunc, info

# ───── MODELO ─────
class DuelingDQN(nn.Module):
    def __init__(self, in_channels=4, num_actions=6):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels,32,8,4), nn.ReLU(),
            nn.Conv2d(32,64,4,2), nn.ReLU(),
            nn.Conv2d(64,64,3,1), nn.ReLU()
        )
        convw = (((84-8)//4+1 -4)//2+1 -3)//1+1
        linear_size = convw*convw*64
        self.fc_value = nn.Sequential(nn.Linear(linear_size,512), nn.ReLU(), nn.Linear(512,1))
        self.fc_adv   = nn.Sequential(nn.Linear(linear_size,512), nn.ReLU(), nn.Linear(512,num_actions))
    def forward(self, x):
        x = x / 255.0
        o = self.conv(x).view(x.size(0), -1)
        V = self.fc_value(o)
        A = self.fc_adv(o)
        return V + (A - A.mean(dim=1,keepdim=True))

In [None]:
# ───── BUFFER ─────
Transition = namedtuple('Transition', ('s','a','r','s2','d'))
class ReplayBuffer:
    def __init__(self, capacity=100000):
        self.buf = deque(maxlen=capacity)
    def push(self, *args): self.buf.append(Transition(*args))
    def sample(self, bs): return random.sample(self.buf, bs)
    def __len__(self): return len(self.buf)

# ───── PREPROCESAMIENTO ─────
def preprocess(frame):
    gray = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
    resized = cv2.resize(gray, (84,84))
    return resized

def stack_frames(st, new_frame, is_new):
    f = preprocess(new_frame)
    if is_new:
        st = np.stack([f]*4, axis=0)
    else:
        st = np.concatenate([st[1:], [f]], axis=0)
    return st

In [None]:
# ───── CONFIG ENTORNO Y MODELO ─────
env = RGBOnly(gnm.make("VizdoomBasic-v0", render_mode="rgb_array"))
NUM_ACTIONS = env.action_space.n
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
policy_net = DuelingDQN(4, NUM_ACTIONS).to(device)
target_net = DuelingDQN(4, NUM_ACTIONS).to(device)
optimizer = optim.Adam(policy_net.parameters(), lr=1e-4)
target_net.load_state_dict(policy_net.state_dict())

In [None]:
# ───── CARGAR MODELO SI EXISTE ─────
try:
    model_path = "/kaggle/input/doom-dqn-model/dueling_dqn_vizdoom.pth"
    policy_net.load_state_dict(torch.load(model_path))
    target_net.load_state_dict(torch.load(model_path))
    print("✅ Modelo cargado correctamente")
except:
    print("⚠️ No se encontró modelo previo, se iniciará desde cero")

In [None]:
# ───── HYPERPARÁMETROS ─────
batch_size = 32
gamma = 0.99
eps_start, eps_end, eps_decay = 1.0, 0.1, 10000
target_update = 1000
steps_done = 0
buffer = ReplayBuffer()
episode_rewards = []

# ───── SELECCIÓN DE ACCIÓN ─────
def select_action(state):
    global steps_done
    eps = eps_end + (eps_start - eps_end) * np.exp(-1. * steps_done / eps_decay)
    steps_done += 1
    if random.random() < eps:
        return random.randrange(NUM_ACTIONS)
    with torch.no_grad():
        s = torch.tensor(state, dtype=torch.float32, device=device).unsqueeze(0)
        return policy_net(s).argmax().item()

In [None]:
# ───── ENTRENAMIENTO ─────
num_episodes = 1000  # puedes ajustar esto
for epi in range(num_episodes):
    frame = env.reset()
    state = stack_frames(None, frame, True)
    total_r = 0
    done = False
    while not done:
        a = select_action(state)
        frame2, r, done, _ = env.step(a)
        total_r += r
        next_state = stack_frames(state, frame2, False)
        buffer.push(state, a, r, next_state, done)
        state = next_state

        if len(buffer) >= batch_size:
            batch = buffer.sample(batch_size)
            bs = Transition(*zip(*batch))
            s = torch.tensor(np.stack(bs.s), dtype=torch.float32, device=device)
            a = torch.tensor(bs.a, dtype=torch.int64, device=device).unsqueeze(1)
            r = torch.tensor(bs.r, dtype=torch.float32, device=device).unsqueeze(1)
            s2 = torch.tensor(np.stack(bs.s2), dtype=torch.float32, device=device)
            d = torch.tensor(bs.d, dtype=torch.float32, device=device).unsqueeze(1)

            q_eval = policy_net(s).gather(1, a)
            with torch.no_grad():
                q_next = target_net(s2).max(1)[0].unsqueeze(1)
                q_target = r + gamma * q_next * (1 - d)

            loss = nn.MSELoss()(q_eval, q_target)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        if steps_done % target_update == 0:
            target_net.load_state_dict(policy_net.state_dict())

    episode_rewards.append(total_r)
    print(f"🎯 Episodio {epi+1}/{num_episodes}, Recompensa: {total_r:.2f}")




In [None]:
# ───── GRAFICAR RECOMPENSAS ─────
plt.figure(figsize=(10,4))
plt.plot(episode_rewards, label='Recompensa por episodio')
plt.xlabel('Episodio')
plt.ylabel('Recompensa')
plt.title('Curva de aprendizaje - Continuación')
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
# ───── VIDEO FINAL ─────
writer = imageio.get_writer("doom_dueling_test.mp4", fps=35)
frame = env.reset()
state = stack_frames(None, frame, True)
done = False
episode_reward = 0.0

while not done:
    with torch.no_grad():
        s = torch.tensor(state, dtype=torch.float32, device=device).unsqueeze(0)
        a = policy_net(s).argmax().item()
    frame2, r, done, _ = env.step(a)
    episode_reward += r
    state = stack_frames(state, frame2, False)
    vis = frame2.copy()
    cv2.putText(vis, str(a), (5,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,0), 2)
    writer.append_data(vis)
writer.close()
display(Video("doom_dueling_test.mp4", embed=True, width=640))
print(f"🎯 Recompensa total del video: {episode_reward:.2f}")

In [None]:
# ───── VIDEO FINAL ─────
writer = imageio.get_writer("doom_dueling_test.mp4", fps=35)
frame = env.reset()
state = stack_frames(None, frame, True)
done = False
episode_reward = 0.0

while not done:
    with torch.no_grad():
        s = torch.tensor(state, dtype=torch.float32, device=device).unsqueeze(0)
        a = policy_net(s).argmax().item()
    frame2, r, done, _ = env.step(a)
    episode_reward += r
    state = stack_frames(state, frame2, False)
    vis = frame2.copy()
    cv2.putText(vis, str(a), (5,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,0), 2)
    writer.append_data(vis)
writer.close()
display(Video("doom_dueling_test.mp4", embed=True, width=640))
print(f"🎯 Recompensa total del video: {episode_reward:.2f}")

In [None]:
# ───── VIDEO FINAL ─────
writer = imageio.get_writer("doom_dueling_test.mp4", fps=35)
frame = env.reset()
state = stack_frames(None, frame, True)
done = False
episode_reward = 0.0

while not done:
    with torch.no_grad():
        s = torch.tensor(state, dtype=torch.float32, device=device).unsqueeze(0)
        a = policy_net(s).argmax().item()
    frame2, r, done, _ = env.step(a)
    episode_reward += r
    state = stack_frames(state, frame2, False)
    vis = frame2.copy()
    cv2.putText(vis, str(a), (5,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,0), 2)
    writer.append_data(vis)
writer.close()
display(Video("doom_dueling_test.mp4", embed=True, width=640))
print(f"🎯 Recompensa total del video: {episode_reward:.2f}")

In [None]:
# ───── VIDEO FINAL ─────
writer = imageio.get_writer("doom_dueling_test.mp4", fps=35)
frame = env.reset()
state = stack_frames(None, frame, True)
done = False
episode_reward = 0.0

while not done:
    with torch.no_grad():
        s = torch.tensor(state, dtype=torch.float32, device=device).unsqueeze(0)
        a = policy_net(s).argmax().item()
    frame2, r, done, _ = env.step(a)
    episode_reward += r
    state = stack_frames(state, frame2, False)
    vis = frame2.copy()
    cv2.putText(vis, str(a), (5,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,0), 2)
    writer.append_data(vis)
writer.close()
display(Video("doom_dueling_test.mp4", embed=True, width=640))
print(f"🎯 Recompensa total del video: {episode_reward:.2f}")

In [None]:
# ───── VIDEO FINAL ─────
writer = imageio.get_writer("doom_dueling_test.mp4", fps=35)
frame = env.reset()
state = stack_frames(None, frame, True)
done = False
episode_reward = 0.0

while not done:
    with torch.no_grad():
        s = torch.tensor(state, dtype=torch.float32, device=device).unsqueeze(0)
        a = policy_net(s).argmax().item()
    frame2, r, done, _ = env.step(a)
    episode_reward += r
    state = stack_frames(state, frame2, False)
    vis = frame2.copy()
    cv2.putText(vis, str(a), (5,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,0), 2)
    writer.append_data(vis)
writer.close()
display(Video("doom_dueling_test.mp4", embed=True, width=640))
print(f"🎯 Recompensa total del video: {episode_reward:.2f}")

In [None]:
# ───── VIDEO FINAL ─────
writer = imageio.get_writer("doom_dueling_test.mp4", fps=35)
frame = env.reset()
state = stack_frames(None, frame, True)
done = False
episode_reward = 0.0

while not done:
    with torch.no_grad():
        s = torch.tensor(state, dtype=torch.float32, device=device).unsqueeze(0)
        a = policy_net(s).argmax().item()
    frame2, r, done, _ = env.step(a)
    episode_reward += r
    state = stack_frames(state, frame2, False)
    vis = frame2.copy()
    cv2.putText(vis, str(a), (5,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,0), 2)
    writer.append_data(vis)
writer.close()
display(Video("doom_dueling_test.mp4", embed=True, width=640))
print(f"🎯 Recompensa total del video: {episode_reward:.2f}")

In [None]:
# ───── VIDEO FINAL ─────
writer = imageio.get_writer("doom_dueling_test.mp4", fps=35)
frame = env.reset()
state = stack_frames(None, frame, True)
done = False
episode_reward = 0.0

while not done:
    with torch.no_grad():
        s = torch.tensor(state, dtype=torch.float32, device=device).unsqueeze(0)
        a = policy_net(s).argmax().item()
    frame2, r, done, _ = env.step(a)
    episode_reward += r
    state = stack_frames(state, frame2, False)
    vis = frame2.copy()
    cv2.putText(vis, str(a), (5,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,0), 2)
    writer.append_data(vis)
writer.close()
display(Video("doom_dueling_test.mp4", embed=True, width=640))
print(f"🎯 Recompensa total del video: {episode_reward:.2f}")

In [None]:
# ───── VIDEO FINAL ─────
writer = imageio.get_writer("doom_dueling_test.mp4", fps=35)
frame = env.reset()
state = stack_frames(None, frame, True)
done = False
episode_reward = 0.0

while not done:
    with torch.no_grad():
        s = torch.tensor(state, dtype=torch.float32, device=device).unsqueeze(0)
        a = policy_net(s).argmax().item()
    frame2, r, done, _ = env.step(a)
    episode_reward += r
    state = stack_frames(state, frame2, False)
    vis = frame2.copy()
    cv2.putText(vis, str(a), (5,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,0), 2)
    writer.append_data(vis)
writer.close()
display(Video("doom_dueling_test.mp4", embed=True, width=640))
print(f"🎯 Recompensa total del video: {episode_reward:.2f}")

In [None]:
# ───── VIDEO FINAL ─────
writer = imageio.get_writer("doom_dueling_test.mp4", fps=35)
frame = env.reset()
state = stack_frames(None, frame, True)
done = False
episode_reward = 0.0

while not done:
    with torch.no_grad():
        s = torch.tensor(state, dtype=torch.float32, device=device).unsqueeze(0)
        a = policy_net(s).argmax().item()
    frame2, r, done, _ = env.step(a)
    episode_reward += r
    state = stack_frames(state, frame2, False)
    vis = frame2.copy()
    cv2.putText(vis, str(a), (5,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,0), 2)
    writer.append_data(vis)
writer.close()
display(Video("doom_dueling_test.mp4", embed=True, width=640))
print(f"🎯 Recompensa total del video: {episode_reward:.2f}")

In [None]:
# ───── VIDEO FINAL ─────
writer = imageio.get_writer("doom_dueling_test.mp4", fps=35)
frame = env.reset()
state = stack_frames(None, frame, True)
done = False
episode_reward = 0.0

while not done:
    with torch.no_grad():
        s = torch.tensor(state, dtype=torch.float32, device=device).unsqueeze(0)
        a = policy_net(s).argmax().item()
    frame2, r, done, _ = env.step(a)
    episode_reward += r
    state = stack_frames(state, frame2, False)
    vis = frame2.copy()
    cv2.putText(vis, str(a), (5,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,0), 2)
    writer.append_data(vis)
writer.close()
display(Video("doom_dueling_test.mp4", embed=True, width=640))
print(f"🎯 Recompensa total del video: {episode_reward:.2f}")

In [None]:
# ───── VIDEO FINAL ─────
writer = imageio.get_writer("doom_dueling_test.mp4", fps=35)
frame = env.reset()
state = stack_frames(None, frame, True)
done = False
episode_reward = 0.0

while not done:
    with torch.no_grad():
        s = torch.tensor(state, dtype=torch.float32, device=device).unsqueeze(0)
        a = policy_net(s).argmax().item()
    frame2, r, done, _ = env.step(a)
    episode_reward += r
    state = stack_frames(state, frame2, False)
    vis = frame2.copy()
    cv2.putText(vis, str(a), (5,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,0), 2)
    writer.append_data(vis)
writer.close()
display(Video("doom_dueling_test.mp4", embed=True, width=640))
print(f"🎯 Recompensa total del video: {episode_reward:.2f}")

In [None]:
# ───── VIDEO FINAL ─────
writer = imageio.get_writer("doom_dueling_test.mp4", fps=35)
frame = env.reset()
state = stack_frames(None, frame, True)
done = False
episode_reward = 0.0

while not done:
    with torch.no_grad():
        s = torch.tensor(state, dtype=torch.float32, device=device).unsqueeze(0)
        a = policy_net(s).argmax().item()
    frame2, r, done, _ = env.step(a)
    episode_reward += r
    state = stack_frames(state, frame2, False)
    vis = frame2.copy()
    cv2.putText(vis, str(a), (5,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,0), 2)
    writer.append_data(vis)
writer.close()
display(Video("doom_dueling_test.mp4", embed=True, width=640))
print(f"🎯 Recompensa total del video: {episode_reward:.2f}")

In [None]:
# ───── VIDEO FINAL ─────
writer = imageio.get_writer("doom_dueling_test.mp4", fps=35)
frame = env.reset()
state = stack_frames(None, frame, True)
done = False
episode_reward = 0.0

while not done:
    with torch.no_grad():
        s = torch.tensor(state, dtype=torch.float32, device=device).unsqueeze(0)
        a = policy_net(s).argmax().item()
    frame2, r, done, _ = env.step(a)
    episode_reward += r
    state = stack_frames(state, frame2, False)
    vis = frame2.copy()
    cv2.putText(vis, str(a), (5,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,0), 2)
    writer.append_data(vis)
writer.close()
display(Video("doom_dueling_test.mp4", embed=True, width=640))
print(f"🎯 Recompensa total del video: {episode_reward:.2f}")

In [None]:
# ───── VIDEO FINAL ─────
writer = imageio.get_writer("doom_dueling_test.mp4", fps=35)
frame = env.reset()
state = stack_frames(None, frame, True)
done = False
episode_reward = 0.0

while not done:
    with torch.no_grad():
        s = torch.tensor(state, dtype=torch.float32, device=device).unsqueeze(0)
        a = policy_net(s).argmax().item()
    frame2, r, done, _ = env.step(a)
    episode_reward += r
    state = stack_frames(state, frame2, False)
    vis = frame2.copy()
    cv2.putText(vis, str(a), (5,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,0), 2)
    writer.append_data(vis)
writer.close()
display(Video("doom_dueling_test.mp4", embed=True, width=640))
print(f"🎯 Recompensa total del video: {episode_reward:.2f}")

In [None]:
# ───── VIDEO FINAL ─────
writer = imageio.get_writer("doom_dueling_test.mp4", fps=35)
frame = env.reset()
state = stack_frames(None, frame, True)
done = False
episode_reward = 0.0

while not done:
    with torch.no_grad():
        s = torch.tensor(state, dtype=torch.float32, device=device).unsqueeze(0)
        a = policy_net(s).argmax().item()
    frame2, r, done, _ = env.step(a)
    episode_reward += r
    state = stack_frames(state, frame2, False)
    vis = frame2.copy()
    cv2.putText(vis, str(a), (5,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,0), 2)
    writer.append_data(vis)
writer.close()
display(Video("doom_dueling_test.mp4", embed=True, width=640))
print(f"🎯 Recompensa total del video: {episode_reward:.2f}")

In [None]:
# ───── VIDEO FINAL ─────
writer = imageio.get_writer("doom_dueling_test.mp4", fps=35)
frame = env.reset()
state = stack_frames(None, frame, True)
done = False
episode_reward = 0.0

while not done:
    with torch.no_grad():
        s = torch.tensor(state, dtype=torch.float32, device=device).unsqueeze(0)
        a = policy_net(s).argmax().item()
    frame2, r, done, _ = env.step(a)
    episode_reward += r
    state = stack_frames(state, frame2, False)
    vis = frame2.copy()
    cv2.putText(vis, str(a), (5,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,0), 2)
    writer.append_data(vis)
writer.close()
display(Video("doom_dueling_test.mp4", embed=True, width=640))
print(f"🎯 Recompensa total del video: {episode_reward:.2f}")

In [None]:
# ───── VIDEO FINAL ─────
writer = imageio.get_writer("doom_dueling_test.mp4", fps=35)
frame = env.reset()
state = stack_frames(None, frame, True)
done = False
episode_reward = 0.0

while not done:
    with torch.no_grad():
        s = torch.tensor(state, dtype=torch.float32, device=device).unsqueeze(0)
        a = policy_net(s).argmax().item()
    frame2, r, done, _ = env.step(a)
    episode_reward += r
    state = stack_frames(state, frame2, False)
    vis = frame2.copy()
    cv2.putText(vis, str(a), (5,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,0), 2)
    writer.append_data(vis)
writer.close()
display(Video("doom_dueling_test.mp4", embed=True, width=640))
print(f"🎯 Recompensa total del video: {episode_reward:.2f}")

In [None]:
# ───── VIDEO FINAL ─────
writer = imageio.get_writer("doom_dueling_test.mp4", fps=35)
frame = env.reset()
state = stack_frames(None, frame, True)
done = False
episode_reward = 0.0

while not done:
    with torch.no_grad():
        s = torch.tensor(state, dtype=torch.float32, device=device).unsqueeze(0)
        a = policy_net(s).argmax().item()
    frame2, r, done, _ = env.step(a)
    episode_reward += r
    state = stack_frames(state, frame2, False)
    vis = frame2.copy()
    cv2.putText(vis, str(a), (5,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,0), 2)
    writer.append_data(vis)
writer.close()
display(Video("doom_dueling_test.mp4", embed=True, width=640))
print(f"🎯 Recompensa total del video: {episode_reward:.2f}")

In [None]:
# ───── VIDEO FINAL ─────
writer = imageio.get_writer("doom_dueling_test.mp4", fps=35)
frame = env.reset()
state = stack_frames(None, frame, True)
done = False
episode_reward = 0.0

while not done:
    with torch.no_grad():
        s = torch.tensor(state, dtype=torch.float32, device=device).unsqueeze(0)
        a = policy_net(s).argmax().item()
    frame2, r, done, _ = env.step(a)
    episode_reward += r
    state = stack_frames(state, frame2, False)
    vis = frame2.copy()
    cv2.putText(vis, str(a), (5,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,0), 2)
    writer.append_data(vis)
writer.close()
display(Video("doom_dueling_test.mp4", embed=True, width=640))
print(f"🎯 Recompensa total del video: {episode_reward:.2f}")

In [None]:
# ───── VIDEO FINAL ─────
writer = imageio.get_writer("doom_dueling_test.mp4", fps=35)
frame = env.reset()
state = stack_frames(None, frame, True)
done = False
episode_reward = 0.0

while not done:
    with torch.no_grad():
        s = torch.tensor(state, dtype=torch.float32, device=device).unsqueeze(0)
        a = policy_net(s).argmax().item()
    frame2, r, done, _ = env.step(a)
    episode_reward += r
    state = stack_frames(state, frame2, False)
    vis = frame2.copy()
    cv2.putText(vis, str(a), (5,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,0), 2)
    writer.append_data(vis)
writer.close()
display(Video("doom_dueling_test.mp4", embed=True, width=640))
print(f"🎯 Recompensa total del video: {episode_reward:.2f}")

In [None]:
# ───── VIDEO FINAL ─────
writer = imageio.get_writer("doom_dueling_test.mp4", fps=35)
frame = env.reset()
state = stack_frames(None, frame, True)
done = False
episode_reward = 0.0

while not done:
    with torch.no_grad():
        s = torch.tensor(state, dtype=torch.float32, device=device).unsqueeze(0)
        a = policy_net(s).argmax().item()
    frame2, r, done, _ = env.step(a)
    episode_reward += r
    state = stack_frames(state, frame2, False)
    vis = frame2.copy()
    cv2.putText(vis, str(a), (5,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,0), 2)
    writer.append_data(vis)
writer.close()
display(Video("doom_dueling_test.mp4", embed=True, width=640))
print(f"🎯 Recompensa total del video: {episode_reward:.2f}")

In [None]:
# ───── VIDEO FINAL ─────
writer = imageio.get_writer("doom_dueling_test.mp4", fps=35)
frame = env.reset()
state = stack_frames(None, frame, True)
done = False
episode_reward = 0.0

while not done:
    with torch.no_grad():
        s = torch.tensor(state, dtype=torch.float32, device=device).unsqueeze(0)
        a = policy_net(s).argmax().item()
    frame2, r, done, _ = env.step(a)
    episode_reward += r
    state = stack_frames(state, frame2, False)
    vis = frame2.copy()
    cv2.putText(vis, str(a), (5,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,0), 2)
    writer.append_data(vis)
writer.close()
display(Video("doom_dueling_test.mp4", embed=True, width=640))
print(f"🎯 Recompensa total del video: {episode_reward:.2f}")

In [None]:
# ───── VIDEO FINAL ─────
writer = imageio.get_writer("doom_dueling_test.mp4", fps=35)
frame = env.reset()
state = stack_frames(None, frame, True)
done = False
episode_reward = 0.0

while not done:
    with torch.no_grad():
        s = torch.tensor(state, dtype=torch.float32, device=device).unsqueeze(0)
        a = policy_net(s).argmax().item()
    frame2, r, done, _ = env.step(a)
    episode_reward += r
    state = stack_frames(state, frame2, False)
    vis = frame2.copy()
    cv2.putText(vis, str(a), (5,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,0), 2)
    writer.append_data(vis)
writer.close()
display(Video("doom_dueling_test.mp4", embed=True, width=640))
print(f"🎯 Recompensa total del video: {episode_reward:.2f}")

In [None]:
# ───── VIDEO FINAL ─────
writer = imageio.get_writer("doom_dueling_test.mp4", fps=35)
frame = env.reset()
state = stack_frames(None, frame, True)
done = False
episode_reward = 0.0

while not done:
    with torch.no_grad():
        s = torch.tensor(state, dtype=torch.float32, device=device).unsqueeze(0)
        a = policy_net(s).argmax().item()
    frame2, r, done, _ = env.step(a)
    episode_reward += r
    state = stack_frames(state, frame2, False)
    vis = frame2.copy()
    cv2.putText(vis, str(a), (5,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,0), 2)
    writer.append_data(vis)
writer.close()
display(Video("doom_dueling_test.mp4", embed=True, width=640))
print(f"🎯 Recompensa total del video: {episode_reward:.2f}")

In [None]:
# ───── VIDEO FINAL ─────
writer = imageio.get_writer("doom_dueling_test.mp4", fps=35)
frame = env.reset()
state = stack_frames(None, frame, True)
done = False
episode_reward = 0.0

while not done:
    with torch.no_grad():
        s = torch.tensor(state, dtype=torch.float32, device=device).unsqueeze(0)
        a = policy_net(s).argmax().item()
    frame2, r, done, _ = env.step(a)
    episode_reward += r
    state = stack_frames(state, frame2, False)
    vis = frame2.copy()
    cv2.putText(vis, str(a), (5,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,0), 2)
    writer.append_data(vis)
writer.close()
display(Video("doom_dueling_test.mp4", embed=True, width=640))
print(f"🎯 Recompensa total del video: {episode_reward:.2f}")

In [None]:
# ───── VIDEO FINAL ─────
writer = imageio.get_writer("doom_dueling_test.mp4", fps=35)
frame = env.reset()
state = stack_frames(None, frame, True)
done = False
episode_reward = 0.0

while not done:
    with torch.no_grad():
        s = torch.tensor(state, dtype=torch.float32, device=device).unsqueeze(0)
        a = policy_net(s).argmax().item()
    frame2, r, done, _ = env.step(a)
    episode_reward += r
    state = stack_frames(state, frame2, False)
    vis = frame2.copy()
    cv2.putText(vis, str(a), (5,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,0), 2)
    writer.append_data(vis)
writer.close()
display(Video("doom_dueling_test.mp4", embed=True, width=640))
print(f"🎯 Recompensa total del video: {episode_reward:.2f}")

In [None]:
# ───── GUARDAR MODELO ─────
torch.save(policy_net.state_dict(), "dueling_model2.pth")
print("💾 Modelo guardado como dueling_model2.pth")