In [3]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [4]:
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import sys

from stable_baselines3.common.monitor import Monitor
from stable_baselines3 import DDPG
from stable_baselines3.common.noise import NormalActionNoise
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.utils import set_random_seed

import matplotlib.pyplot as plt

sys.path.append('..')

import MazeEnv.MultiTargetMazeEnv as mtmz
from MazeEnv.MazeEnv import Rewards
from Utils import make_circular_map, clear_files, get_multi_targets_circle_envs
from Evaluation import EvalAndSaveCallback, MultiTargetEvalAndSaveCallback
import Evaluation

import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [8]:
RUN_NAME="8m_mass_and_friction"
os.makedirs(os.path.join("logs/MultiTargets", RUN_NAME), exist_ok=True)

TOTAL_TIME_STEPS = 8_000_000 #number of training timesteps
BUFFER_SIZE = 500_000
TIMEOUT_STEPS = 1000 # Timeout Steps of each episode 
LEARNING_RATE = 2e-7
REDUCE_LR = True
EXPLORATION_NOISE_STD = 0.05
LEARNING_STARTS=5000
ACTION_FORCE=1500

REWARDS = Rewards(target_arrival=10, collision=-20, timeout=-10, idle=-0.01, fall=-20)


# EVAL_EPISODES = 30
EVAL_FREQ = 50_000
VIDEO_FREQ = 20

targets = np.genfromtxt("TestTargets/test_coords.csv", delimiter=',')

maze_env, eval_maze_env = get_multi_targets_circle_envs(radius=2.9,
                                                        targets = targets,
                                                        timeout_steps=TIMEOUT_STEPS,
                                                        rewards=REWARDS,
                                                        monitor_dir=os.path.join("logs/MultiTargets", RUN_NAME, "results"),
                                                        action_force=ACTION_FORCE)
 
# create model:
exploration_noise = NormalActionNoise(mean=np.array([0]*8), sigma=np.array([EXPLORATION_NOISE_STD]*8))

def lr_func(progress):
    if progress < 0.5 and REDUCE_LR:
        return LEARNING_RATE/5
    return LEARNING_RATE

set_random_seed(314, True)

model = DDPG(policy="MlpPolicy",
             env=maze_env,
             buffer_size=BUFFER_SIZE,
             learning_rate=lr_func,
             action_noise=exploration_noise,
             device=device,
             train_freq=(1, "episode"),
             learning_starts=LEARNING_STARTS,
             verbose=0,
             tensorboard_log="./logs/MultiTargets/tb")

# create callback for evaluation
callback = MultiTargetEvalAndSaveCallback(log_dir=os.path.join("logs/MultiTargets", RUN_NAME),
                                          eval_env=eval_maze_env,
                                          eval_freq=EVAL_FREQ,
                                          eval_video_freq=VIDEO_FREQ,
                                          verbose=1)

In [9]:
# clear old videos: 
clear_files('logs/MultiTargets/*.gif')
clear_files('logs/MultiTargets/*.avi')

In [10]:
start = time.time()


model.learn(total_timesteps=TOTAL_TIME_STEPS,
            callback=callback,
            tb_log_name=RUN_NAME)
 
print("time", time.time() - start)

50000.00 Steps evaluation, avg reward:-19.99, avg episode length: 1000.00, success rate: 0.00
--Saving new best model--
100000.00 Steps evaluation, avg reward:-19.99, avg episode length: 1000.00, success rate: 0.00


KeyboardInterrupt: 

In [None]:
Evaluation.plot_train_eval_results("logs/MultiTargets", 30)

In [None]:
from IPython.display import Image
import ipyplot
import glob

# create gifs and plot them:
Evaluation.create_gifs_from_avi("logs/MultiTargets")

gifs = glob.glob("logs/MultiTargets/*_steps.gif")
labels = [pth.split('/')[-1].split('.')[0] for pth in gifs]
ipyplot.plot_images(gifs, labels, img_width=200)


In [None]:
# load best model
best_model = DDPG.load("./logs/MultiTargets/best_model",env=maze_env)

# evaluate for last final episode (random target)
episode_reward = Evaluation.record_model(best_model, eval_maze_env, "logs/MultiTargets/final/final.avi")
print("Final Evaluation Reward:", episode_reward)

Evaluation.create_gifs_from_avi("logs/MultiTargets/final")

gifs = glob.glob("logs/MultiTargets/final/final.gif")
print(gifs)
ipyplot.plot_images(gifs, img_width=250)