# Tutorial

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from doom_arena import VizdoomMPEnv

from doom_arena.player import ObsBuffer

In [None]:
env = VizdoomMPEnv(
    num_players=1,
    num_bots=16,
    bot_skill=2,
    doom_map="TRMN",
    extra_state=[ObsBuffer.LABELS],
    episode_timeout=1000,
)

In [4]:
env.enable_replay()

## Random policy (2 players)

In [None]:
for episode in range(1):
    ep_return = {k: 0.0 for k in range(env.num_players)}
    ep_step = 0
    obs = env.reset()
    done = None
    for i in range(1000):
        act = 2
        obs, rwd, done, info = env.step(act)
        ep_return = {k: ep_return[k] + rwd[i] for i, k in enumerate(ep_return)}
        if done:
            print("ep steps: {}; ep return: {}".format(ep_step, ep_return))
            break
        else:
            ep_step += 1

In [None]:
from IPython.display import HTML

from doom_arena.render import render_episode


ani = render_episode(env.get_player_replays(), subsample=10)
HTML(ani.to_html5_video())

## Eval DQN

In [1]:
import os
import sys

sys.path.append("..")

os.environ["CUDA_VISIBLE_DEVICES"] = "2"

In [2]:
import numpy as np
import torch
from torch.nn import functional as F
from torchvision import transforms

from agents.utils import stack_dict, to_tensor, resize, minmax


frame_transform = transforms.Compose([stack_dict, to_tensor, resize, minmax])

In [None]:
from doom_arena import VizdoomMPEnv
from doom_arena.player import ObsBuffer

N_STACK_FRAMES = 4

env = VizdoomMPEnv(
    num_players=1,
    num_bots=12,
    doom_map="TRMN",
    extra_state=[ObsBuffer.LABELS, ObsBuffer.DEPTH],
    n_stack_frames=N_STACK_FRAMES,
    episode_timeout=2000,
    hud="none",
    player_transform=frame_transform,
)

env.enable_replay()

In [18]:
device = "cuda"

In [None]:
env.observation_space

In [None]:
from agents.dqn import DQN


dqn = DQN(
    space=3 if N_STACK_FRAMES > 1 else 2,
    input_dim=env.observation_space.shape[0],
    action_space=env.action_space.n,
    dim=64,
).to(device)

dqn.load_state_dict(torch.load("dqn.pth"))

In [None]:
ep_return = {k: 0.0 for k in range(env.num_players)}
ep_step = 0
done = False
obs = env.reset()
while not done:
    obs = obs[0].to(device)
    with torch.no_grad():
        act = dqn(obs.unsqueeze(0)).argmax().item()
    obs, rwd, done, info = env.step(act)
    ep_return = {k: ep_return[k] + rwd[i] for i, k in enumerate(ep_return)}
    if done:
        print("ep steps: {}; ep return: {}".format(ep_step, ep_return))
        break
    else:
        ep_step += 1

In [None]:
from IPython.display import HTML

from doom_arena.render import render_episode


ani = render_episode(env.get_player_replays(), subsample=5)
HTML(ani.to_html5_video())