# Action Wrapper

In [7]:
import gym
from gym.spaces import Discrete
import numpy as np

In [8]:
class DiscreteActions(gym.ActionWrapper):
    def __init__(self, env, disc_to_cont):
        super().__init__(env)
        self.disc_to_cont = disc_to_cont
        self.action_space = Discrete(len(disc_to_cont))
    
    def action(self, act):
        return self.disc_to_cont[act]

In [9]:
'''
0: Vx=0, Vy=0
1: Vx=1, Vy=0
2: Vx=0, Vy=1
3: Vx=-1, Vy=0
4: Vx=0, Vy=-1
5: Vx=1, Vy=1
6: Vx=-1, Vy=-1
7: Vx=1, Vy=-1
8: Vx=-1, Vy=1
'''
discrete_actions = [np.array([0,0]), np.array([1,0]),
                    np.array([0,1]), np.array([-1,0]),
                    np.array([0,-1]), np.array([1,1]),
                    np.array([-1,-1]), np.array([1,-1]),
                    np.array([-1,1])]

# Test environment (no render)

In [None]:
import time
import matplotlib.pyplot as plt
%matplotlib inline
from crowd_sim.envs.crowd_sim_sgan import CrowdSimSgan
from crowd_sim.envs.crowd_sim_no_pred import CrowdSimNoPred
from arguments import get_args
from crowd_nav.configs.config import Config
config = Config()

In [None]:
env = CrowdSimNoPred()
# env = CrowdSimSgan()
env.configure(config)

env.setup(seed=0, num_of_env=1, ax=None)

In [None]:
env = DiscreteActions(env, discrete_actions)
env.action_space

In [None]:
step = 0
episode = 0
while step < 1000000:
    obs = env.reset()
    done = False
    
    while not done:
        action = env.action_space.sample()
        obs, reward, done, info = env.step(action)
        step += 1
    episode += 1
    print('episode {}: {} steps'.format(episode, step))

env.close()

In [None]:
episodes = 9
for episode in range(1, episodes+1):
    obs = env.reset()
    done = False
    score = 0 
    avg_time = 0
    step = 0
    
    while not done:
        env.render()
        action = episode - 1
        start_time = time.time()
        obs, reward, done, info = env.step(action)
        end_time = time.time()
        avg_time += (end_time - start_time)
        step += 1
        score+=reward
        print(obs['local_goal'])
    print('Episode:{} Score:{}'.format(episode, score))
    print('average step time ({} steps): {}s'.format(step, avg_time/step))
env.close()

# Test environment (render simulation)

In [None]:
import time
import matplotlib.pyplot as plt
%matplotlib tk
from crowd_sim.envs.crowd_sim_sgan import CrowdSimSgan
from crowd_sim.envs.crowd_sim_no_pred import CrowdSimNoPred
from arguments import get_args
from crowd_nav.configs.config import Config
config = Config()

In [None]:
fig = plt.figure(figsize=(7, 7))
ax1 = plt.subplot()
ax1.set_xlim(-10, 10)
ax1.set_ylim(-10, 10)
ax1.set_xlabel('x(m)', fontsize=16)
ax1.set_ylabel('y(m)', fontsize=16)

plt.ion()
plt.show()

In [None]:
# env = CrowdSimNoPred()
env = CrowdSimSgan()
env.configure(config)

env.setup(seed=0, num_of_env=1, ax=ax1)

In [None]:
env = DiscreteActions(env, discrete_actions)
env.action_space

In [None]:
episodes = 9
for episode in range(1, episodes+1):
    obs = env.reset()
    done = False
    score = 0 
    avg_time = 0
    step = 0
    
    while not done:
        env.render()
        action = env.action_space.sample()
        start_time = time.time()
        obs, reward, done, info = env.step(action)
        end_time = time.time()
        avg_time += (end_time - start_time)
        step += 1
        score+=reward
        print(obs['local_goal'])
    print('Episode:{} Score:{}'.format(episode, score))
    print('average step time ({} steps): {}s'.format(step, avg_time/step))
env.close()

# Training RL

In [1]:
from crowd_sim.envs.crowd_sim_sgan import CrowdSimSgan
from crowd_sim.envs.crowd_sim_no_pred import CrowdSimNoPred
import gym
import time

In [2]:
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.utils import set_random_seed
from stable_baselines3 import DQN

In [3]:
from arguments import get_args
from crowd_nav.configs.config import Config

config = Config()

In [4]:
from stable_baselines3.common.callbacks import BaseCallback
import os

class TrainAndLoggingCallback(BaseCallback):

    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path

    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)

        return True

In [5]:
CHECKPOINT_DIR = './train/DQN_SGAN/'
LOG_DIR = './logs/SGAN/'

In [10]:
# env = CrowdSimNoPred()
env = CrowdSimSgan()
env.configure(config)
env.setup(seed=0, num_of_env=1)

env = DiscreteActions(env, discrete_actions)
env.action_space

Discrete(9)

In [11]:
callback = TrainAndLoggingCallback(check_freq=100000, save_path=CHECKPOINT_DIR)

In [12]:
model = DQN('MultiInputPolicy', env, verbose=1, tensorboard_log=LOG_DIR) 

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


MemoryError: Unable to allocate 53.6 GiB for an array with shape (1000000, 1, 1, 240, 240) and data type uint8

In [None]:
model.policy

In [None]:
model.learn(total_timesteps=3000000, callback=callback)

In [None]:
model.save('latestmodel')

# 4. Test it Out

In [None]:
from crowd_sim.envs.crowd_sim_sgan import CrowdSimSgan
from crowd_sim.envs.crowd_sim_no_pred import CrowdSimNoPred
import gym
import time

In [None]:
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.utils import set_random_seed
from stable_baselines3 import DQN

In [None]:
from arguments import get_args
from crowd_nav.configs.config import Config

config = Config()

In [None]:
import matplotlib.pyplot as plt
%matplotlib tk

In [None]:
fig = plt.figure(figsize=(7, 7))
ax1 = plt.subplot()
ax1.set_xlim(-10, 10)
ax1.set_ylim(-10, 10)
ax1.set_xlabel('x(m)', fontsize=16)
ax1.set_ylabel('y(m)', fontsize=16)

plt.ion()
plt.show()

In [None]:
# env = CrowdSimNoPred()
env = CrowdSimSgan()
env.configure(config)
env.setup(seed=0, num_of_env=1, ax=ax1)

env = DiscreteActions(env, discrete_actions)
env.action_space

In [None]:
CHECKPOINT_DIR = './train/DQN_SGAN/'
LOG_DIR = './logs/SGAN/'

In [None]:
# Load model
model = DQN.load('./train/DQN_SGAN/latestmodel', env, tensorboard_log=LOG_DIR)

In [None]:
episodes = 5
for episode in range(1, episodes+1):
    obs = env.reset()
    done = False
    score = 0 
    avg_time = 0
    step = 0
    
    while not done:
        env.render()
        action, _states = model.predict(obs)
        start_time = time.time()
        obs, reward, done, info = env.step(action)
        end_time = time.time()
        avg_time += (end_time - start_time)
        step += 1
        score+=reward
        # print(obs['local_goal'])
    print('Episode:{} Score:{}'.format(episode, score))
    print('average step time ({} steps): {}s'.format(step, avg_time/step))
env.close()

# Multi-processing

In [None]:
from crowd_sim.envs.crowd_sim_sgan import CrowdSimSgan
from crowd_sim.envs.crowd_sim_no_pred import CrowdSimNoPred
import gym
import time

In [None]:
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.utils import set_random_seed
from stable_baselines3 import DQN

In [None]:
from arguments import get_args
from crowd_nav.configs.config import Config

config = Config()

In [None]:
from stable_baselines3.common.callbacks import BaseCallback
import os

class TrainAndLoggingCallback(BaseCallback):

    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path

    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)

        return True

In [None]:
num_cpu = 6  # Number of processes/threads to use
seed = 0

In [None]:
def make_env(seed, rank, env_config, envNum=1):
    """
    Utility function for multiprocessed env.

    :param env_id: (str) the environment ID
    :param seed: (int) the inital seed for RNG
    :param rank: (int) index of the subprocess
    """

    def _init():
        env = CrowdSimSgan()
        # use a seed for reproducibility
        # Important: use a different seed for each environment
        # otherwise they would generate the same experiences
        env.seed(seed + rank)
        env.setup(seed=seed+rank, num_of_env=envNum)
        env.configure(env_config)
        env = DiscreteActions(env, discrete_actions)
        env.action_space
        return env

    return _init

In [None]:
envs = SubprocVecEnv([make_env(seed, i, config, num_cpu) for i in range(num_cpu)])

In [None]:
CHECKPOINT_DIR = './train/DQN_SGAN/'
LOG_DIR = './logs/SGAN/'

In [None]:
model = DQN('MultiInputPolicy', envs, verbose=1, tensorboard_log=LOG_DIR) 

In [None]:
callback = TrainAndLoggingCallback(check_freq=100000, save_path=CHECKPOINT_DIR)

In [None]:
model.learn(total_timesteps=2000000, callback=callback)