# Training Navigator Agent for Vanilla maze

The following notebook is aimed to train the agent navigator that generates subgoals in order to arrive the main goal.

The ***input*** for neural network should be main goal from user, and observation of ant robot.
The ***output*** should be a subgoal - (r, theta) coordinates to go to in the next few simulation steps.

In [None]:
import torch
import numpy as np
import time

from stable_baselines3 import DDPG
from stable_baselines3.common.noise import NormalActionNoise

from Utils import get_vanilla_navigator_env
import argparse
from Training.Evaluation import MultiTargetEvalAndSaveCallback

In [None]:
# params
EVAL_FREQ = 100_000
VIDEO_FREQ = 25
BUFFER_SIZE = 10_000
TIMEOUT_STEPS = 300 # Timeout Steps of each episode
LEARNING_RATE = 0.0001
EXPLORATION_NOISE_STD = 0.03

TOTAL_TIME_STEPS = 100

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
parser = argparse.ArgumentParser(description="Solve the vanilla maze with fixed manually generated actions")
parser.add_argument('--to_vid', dest='to_vid', action='store_const', const=True, default=False)

args = parser.parse_args()

# Initialize environment and Eval env.
nav_env = get_vanilla_navigator_env(show_gui=not args.to_vid)
nav_env.visualize_mode(not args.to_vid)
nav_env_eval = get_vanilla_navigator_env(show_gui=not args.to_vid)
nav_env_eval.visualize_mode(not args.to_vid)

_ = nav_env.reset()
nav_env.maze_env.reset(create_video=args.to_vid, video_path="TrainVanilla.avi")


# create callback for evaluation
callback = MultiTargetEvalAndSaveCallback(log_dir="logs/VanillaMazeNavigator",
                                          eval_env=nav_env_eval,
                                          eval_freq=EVAL_FREQ,
                                          eval_video_freq=VIDEO_FREQ,
                                          verbose=1)


In [None]:
exploration_noise = NormalActionNoise(mean=np.array([0]*2), sigma=np.array([EXPLORATION_NOISE_STD]*2))

model = DDPG(policy="MlpPolicy",
             env=nav_env,
             buffer_size=BUFFER_SIZE,
             learning_rate=LEARNING_RATE,
             action_noise=exploration_noise,
             device=device,
             train_freq=(1, "episode"),
             verbose=0,
             tensorboard_log="./logs/VanillaMazeNavigator/tb")

In [None]:
start = time.time()

model.learn(total_timesteps=TOTAL_TIME_STEPS,
            callback=callback,
            tb_log_name="vanillaTrain")

print("time", time.time() - start)