# Test Deep RL Algorithm Implementations with BBRL

In [None]:
import torch
from omegaconf import OmegaConf

import bbrl_utils
from bbrl_utils.notebook import setup_tensorboard
from bbrl.stats import WelchTTest

import bbrl_gymnasium

from pmind.algorithms import DQN, DDPG, TD3
from pmind.losses import dqn_compute_critic_loss, ddqn_compute_critic_loss
from pmind.training import run_dqn, run_ddpg, run_td3
from pmind.config.loader import load_config

bbrl_utils.setup()

%load_ext autoreload
%autoreload 2

Load all configurations:

In [None]:
cfg = load_config("bbrl_example")
cfg_dqn = OmegaConf.create(cfg.dqn)
cfg_dqqn = OmegaConf.create(cfg.ddqn)
cfg_ddpg = OmegaConf.create(cfg.ddpg)
cfg_td3 = OmegaConf.create(cfg.td3)

In [None]:
setup_tensorboard("./outputs/tblogs")

## Discrete action space

### DQN:

In [None]:
dqn = DQN(cfg_dqn)
run_dqn(dqn, dqn_compute_critic_loss)
dqn.visualize_best()

### DDQN:

In [None]:
ddqn = DQN(cfg_dqqn)
run_dqn(ddqn, ddqn_compute_critic_loss)
ddqn.visualize_best()

In [None]:
WelchTTest().plot(
    torch.stack(dqn.eval_rewards), torch.stack(ddqn.eval_rewards), save=False
)

# Continuous action space

### DDPG:

In [None]:
ddpg = DDPG(cfg_ddpg)
run_ddpg(ddpg)
ddpg.visualize_best()

### TD3:

In [None]:
td3 = TD3(cfg_td3)
run_td3(td3)
td3.visualize_best()

In [None]:
WelchTTest().plot(
    torch.stack(ddpg.eval_rewards),
    torch.stack(td3.eval_rewards),
    legends="ddpg/td3",
    save=False,
)