In [1]:
from vizdoom import *
import numpy as np
import pandas as pd

In [2]:
import helpers
from environments import utils
from environments import constants
from helpers import frameutils
from vizdoom.vizdoom import Mode
from vizdoom.vizdoom import ScreenFormat
import matplotlib.pyplot as plt
from stable_baselines3.common.vec_env import VecTransposeImage, VecFrameStack, DummyVecEnv
from stable_baselines3 import PPO
import collections

In [3]:
pd.options.display.float_format = '{:,.2f}'.format

In [16]:
from config import EnvironmentConfig

env_config = EnvironmentConfig({
    "scenario": "defend_the_center",
    "type": "singleplayer",
    "args": {},
    "vizdoom_mode": "PLAYER",
    "n_parallel": 4,
    "frame_skip": 1,
    "frame_stack": 1,
    "action_combination": False,
    "action_noop": False,
    "obs_width": 320,
    "obs_height": 240,
    "obs_channels": 3,
    "obs_crop": [
      40,
      4,
      0,
      4
    ],
    "obs_resize": [
      0.5,
      0.5
    ]
  })

In [41]:
env = VecTransposeImage(
   VecFrameStack(
        DummyVecEnv([lambda: utils.create_env(env_config, eval=True)]),
        1
    )
)


buttons = env.venv.venv.envs[0].game.get_available_buttons()
buttons = [str(b)[7:] for b in buttons]
buttons = np.array(['ATK', 'TL', 'TR'])
print(buttons)

possible_actions = env.venv.venv.envs[0].possible_actions
print(possible_actions)

env.close()

Built action space of size 3 from buttons [<Button.ATTACK: 0>, <Button.TURN_LEFT: 15>, <Button.TURN_RIGHT: 14>]
['ATK' 'TL' 'TR']
[[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]


In [50]:
action_names = []

for a in possible_actions:
    active = np.flatnonzero(a)
    action_names.append('+'.join(buttons[active]))
    
action_names

['ATK', 'TL', 'TR']

In [51]:
action_df = pd.DataFrame(possible_actions, columns = buttons)
action_df['action_names'] = action_names
action_df

Unnamed: 0,ATK,TL,TR,action_names
0,1.0,0.0,0.0,ATK
1,0.0,1.0,0.0,TL
2,0.0,0.0,1.0,TR


In [20]:
model_path = '/home/leandro/ml/rl-doom/trained_agents/defend_the_center/best_model.zip'
model = PPO.load(model_path)

In [21]:
model.policy.to('cpu')

ActorCriticCnnPolicy(
  (features_extractor): CNNFeatureExtractor(
    (cnn): Sequential(
      (0): LayerNorm((3, 100, 156), eps=1e-05, elementwise_affine=True)
      (1): Conv2d(3, 32, kernel_size=(8, 8), stride=(4, 4), bias=False)
      (2): LayerNorm((32, 24, 38), eps=1e-05, elementwise_affine=True)
      (3): LeakyReLU(negative_slope=0.1)
      (4): Conv2d(32, 64, kernel_size=(4, 4), stride=(2, 2), bias=False)
      (5): LayerNorm((64, 11, 18), eps=1e-05, elementwise_affine=True)
      (6): LeakyReLU(negative_slope=0.1)
      (7): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), bias=False)
      (8): LayerNorm((64, 9, 16), eps=1e-05, elementwise_affine=True)
      (9): LeakyReLU(negative_slope=0.1)
      (10): Flatten(start_dim=1, end_dim=-1)
    )
    (linear): Sequential(
      (0): Linear(in_features=9216, out_features=128, bias=False)
      (1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
      (2): LeakyReLU(negative_slope=0.1)
    )
  )
  (mlp_extractor): MlpExtr

In [22]:
import torch as th
import time

In [34]:
env = VecTransposeImage(
   VecFrameStack(
        DummyVecEnv([lambda: utils.create_env(env_config, eval=True)]),
        1
    )
)

env.venv.venv.envs[0].game.set_seed(1)
obs = env.reset()
obs = th.as_tensor(obs).float()
done = False
i=0

maxlen=5000
obs_buffer = collections.deque(maxlen=maxlen)
proba_buffer = collections.deque(maxlen=maxlen)
action_buffer = collections.deque(maxlen=maxlen)
value_buffer = collections.deque(maxlen=maxlen)

with th.no_grad():
    while not done:
        i += 1
                
        obs_buffer.append(obs)

        latent_pi, latent_vf, latent_sde = model.policy._get_latent(obs)
        values =  model.policy.value_net(latent_vf)
        distribution = model.policy._get_action_dist_from_latent(latent_pi, latent_sde)
        
        action = distribution.get_actions(deterministic=False).numpy()
                
        proba_buffer.append(distribution.distribution.probs.numpy())
        value_buffer.append(values.item())
        
        obs, rew, done, _ = env.step(action)
        obs = th.as_tensor(obs).float()
        
        time.sleep(0.02)
env.close()

Built action space of size 3 from buttons [<Button.ATTACK: 0>, <Button.TURN_LEFT: 15>, <Button.TURN_RIGHT: 14>]


SignalException: Signal SIGINT received. ViZDoom instance has been closed.

In [29]:
env.close()

In [37]:
%matplotlib notebook

plt.plot(np.array(value_buffer))
plt.show()

<IPython.core.display.Javascript object>

In [40]:
proba_buffer

deque([array([[9.9446422e-01, 5.0171195e-03, 5.1870936e-04]], dtype=float32),
       array([[9.9449760e-01, 4.9921423e-03, 5.1022583e-04]], dtype=float32),
       array([[9.9600983e-01, 3.2571123e-03, 7.3306658e-04]], dtype=float32),
       array([[9.9593300e-01, 3.3847624e-03, 6.8233605e-04]], dtype=float32),
       array([[9.9603242e-01, 3.2522478e-03, 7.1529666e-04]], dtype=float32),
       array([[9.9522424e-01, 4.0171039e-03, 7.5861730e-04]], dtype=float32),
       array([[0.99426585, 0.00473268, 0.00100141]], dtype=float32),
       array([[0.9909492 , 0.00725623, 0.00179454]], dtype=float32),
       array([[0.9909492 , 0.00725623, 0.00179454]], dtype=float32),
       array([[0.9909492 , 0.00725623, 0.00179454]], dtype=float32),
       array([[0.963127  , 0.0186176 , 0.01825537]], dtype=float32),
       array([[0.00128848, 0.00135021, 0.99736136]], dtype=float32),
       array([[6.7629002e-04, 1.4527432e-03, 9.9787092e-01]], dtype=float32),
       array([[7.5536902e-04, 1.3702145e

In [52]:
n = 10
img_idx = 10
imrange =list(range(img_idx,img_idx+n))

f = plt.figure(constrained_layout=True, figsize=(15, n * 4))
gs = f.add_gridspec(len(imrange), 3)


for i in range(n):
    idx = img_idx + i
    probas =  proba_buffer[idx][0]
    action_dist = pd.DataFrame(probas, index=action_names).transpose()
    button_dist = np.sum(action_df[buttons] * probas[:,np.newaxis], axis=0).transpose()
    
    
    a1 = f.add_subplot(gs[i, 0])
    a2 = f.add_subplot(gs[i, 1])
    a3 = f.add_subplot(gs[i, 2:])
    
    a1.imshow(np.rollaxis(obs_buffer[idx][0].numpy()/255.0, 0, 3))
    a3.bar(action_names, probas)
    a3.tick_params(rotation=90)
    a3.set_ylim(0,1)
    a2.bar(buttons, button_dist)
    a2.tick_params(rotation=90)
    a2.set_ylim(0,1)
    

<IPython.core.display.Javascript object>

In [63]:
pb = np.array(proba_buffer).squeeze()

button_activation = []
for i in range(len(proba_buffer)):
    button_activation.append(np.sum(action_df[buttons] * pb[i][:, np.newaxis], axis=0))
    
button_activation = pd.concat(button_activation, axis=1).transpose()
button_activation = button_activation.ewm(alpha=0.7).mean()

button_activation

Unnamed: 0,ATK,TL,TR
0,0.99,0.01,0.00
1,0.99,0.00,0.00
2,1.00,0.00,0.00
3,1.00,0.00,0.00
4,1.00,0.00,0.00
...,...,...,...
58,0.00,0.29,0.71
59,0.00,0.25,0.75
60,0.00,0.15,0.85
61,0.00,0.05,0.95


In [64]:
button_activation.plot()

<IPython.core.display.Javascript object>

<AxesSubplot:>

In [70]:
# Create a figure with two subplots
f, ax = plt.subplots(2,2, figsize=(9,5))
ax = ax.ravel()

i= 50
ax[1].set_xlim(0,100)
ax[2].set_ylim(0,1)
ax[3].set_ylim(0,1)

# Set titles
ax[0].set_title('Game frame')
ax[1].set_title('Value function')
ax[2].set_title('Button total')
ax[3].set_title('Action probabilities')

plt.subplots_adjust(top = 0.93, bottom = 0.07, hspace = 0.3)

button_activation.iloc[:10].plot(ax=ax[2])

probas =  proba_buffer[i][0]

button_dist = np.sum(action_df[buttons] * probas[:,np.newaxis], axis=0).transpose()

for a in ax:
    a.clear()
    ax[0].set_title('Game frame')
    ax[1].set_title('Value function')
    ax[2].set_title('Action probabilities')
    ax[3].set_title('Action probabilities')


ax[0].imshow(np.rollaxis(obs_buffer[i][0].numpy()/255.0, 0, 3))
ax[1].plot(np.array(value_buffer)[max(0, i-40): i+1])
ax[2].plot(button_activation.iloc[max(0,i-40):i+1])
ax[2].set_xlim(max(0, i-40), max(i+2,10))
ax[2].legend(buttons, loc='center left', bbox_to_anchor=(-0.35, 0.5))

for j,b in enumerate(buttons):
    ax[2].text(i+0.5, button_activation.iloc[i, j], b, va='center')

ax[2].tick_params(rotation=90)

<IPython.core.display.Javascript object>

In [66]:
%matplotlib notebook

import random
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation

# Create a figure with two subplots
f, ax = plt.subplots(2,2, figsize=(9,5))
ax = ax.ravel()

ax[1].set_xlim(0,100)
ax[2].set_ylim(0,1)
ax[3].set_ylim(0,1)

# Set titles
ax[0].set_title('Game frame')
ax[1].set_title('Value function')
ax[2].set_title('Button total')
ax[3].set_title('Action probabilities')

plt.subplots_adjust(top = 0.93, bottom = 0.07, hspace = 0.3)

button_activation.iloc[:10].plot(ax=ax[2])

#define the function for use in matplotlib.animation.funcAnimation
def animate(i):
    probas =  proba_buffer[i][0]
    
    button_dist = np.sum(action_df[buttons] * probas[:,np.newaxis], axis=0).transpose()
    
    for a in ax:
        a.clear()
        ax[0].set_title('Game frame')
        ax[1].set_title('Value function')
        ax[2].set_title('Button total')
        ax[3].set_title('Action probabilities')
        
    
    ax[0].imshow(np.rollaxis(obs_buffer[i][0].numpy()/255.0, 0, 3))
    ax[1].plot(np.array(value_buffer)[max(0, i-40): i+1])
    ax[2].plot(button_activation.iloc[max(0,i-40):i+1])
    ax[2].set_xlim(max(0, i-40), max(i+2,10))
    ax[2].legend(buttons, loc='center left', bbox_to_anchor=(-0.4, 0.5))
    
    for j,b in enumerate(buttons):
        ax[2].text(i+0.5, button_activation.iloc[i, j], b, va='center')
    
    ax[2].tick_params(rotation=90)
    ax[3].bar(action_names[::-1], probas[::-1])
    ax[3].tick_params(rotation=90)
    
ani = animation.FuncAnimation(f, animate, frames=len(obs_buffer), interval=100, repeat=False,  blit=True)

# Set up formatting for the movie files

Writer = animation.writers['ffmpeg']
writer = Writer(fps=10, metadata=dict(artist='Me'), bitrate=1800)

# Save ani
ani.save(r'/home/leandro/Videos/AnimationOutput.mp4', writer=writer)

#plt.show()


<IPython.core.display.Javascript object>