In [None]:
# Auto reload modules
%load_ext autoreload
%autoreload 2

# Set the matplotlib backend
%matplotlib widget

import hydra
from drone_rl_school.agents.dqn import DQNAgent, ReplayBuffer
from drone_rl_school.agents.pid import PIDAgent
from drone_rl_school.envs.drone_env.v0 import PointMassEnv
from drone_rl_school.agents.q_learning import QLearningAgent
from drone_rl_school.train.train import train
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from omegaconf import OmegaConf
import os
import subprocess
from torch.utils.tensorboard import SummaryWriter

In [None]:
# Initialize Hydra and get the config
with hydra.initialize(config_path="../configs", version_base=None):
    cfg = hydra.compose(config_name="config")

# Prepare the logging directory
commit = subprocess.check_output(["git", "rev-parse", "HEAD"]).decode().strip()
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
exp_name  = f"{timestamp}_{cfg.agent.type}_{commit[:7]}"
log_dir   = os.path.join(cfg.run.log_root, exp_name)
os.makedirs(log_dir, exist_ok=True)

# Save a frozen copy of the config
with open(os.path.join(log_dir, "config.yaml"), "w") as fp:
    fp.write(OmegaConf.to_yaml(cfg))

# Create the actual tensorboard logger
writer = SummaryWriter(log_dir)    # run in terminal: "tensorboard --logdir=runs", address: http://localhost:6006

# Set up the environment
random_seed = cfg.env.random_number_generator_seed
env = PointMassEnv(cfg, random_seed)

next_episode_to_train = 0
best_score = float('-inf')

buffer = None

# Define the agent
if cfg.agent.type == 'dqn':    
    agent = DQNAgent(cfg)
    buffer = ReplayBuffer(cfg)
elif cfg.agent.type == 'q_learning':
    agent = QLearningAgent(cfg)
elif cfg.agent.type == 'pid':
    agent = PIDAgent(cfg)

In [None]:
# Run one training loop
last_episode, rewards, best_score = train(agent, env, writer, cfg,
                start_episode=next_episode_to_train, best_score=best_score,
                buffer=buffer)
next_episode_to_train = last_episode + 1

In [None]:
# Run a simulation
trajectories = [env.simulate(agent) for _ in range(3)]
env.animate(trajectories, env.goal, trajectory_names=['dqn'] * 3)

In [None]:
# Run a simulation in comparison to a simple pid controller
sim_seed = np.random.randint(100000)
with hydra.initialize(config_path="../configs", version_base=None):
    pid_config = hydra.compose(config_name="config", overrides=["agent=pid"])
# trajectories = [PointMassEnv(cfg, sim_seed).simulate(agent), PointMassEnv(pid_config, sim_seed).simulate(PIDAgent(pid_config))]
trajectories = [PointMassEnv(pid_config, sim_seed).simulate(PIDAgent(pid_config))]
# env.animate(trajectories, env.goal, trajectory_names=['dqn', 'pid'])
env.animate(trajectories, env.goal, trajectory_names=['pid'])