# Test environment (no render)

In [None]:
import time
import matplotlib.pyplot as plt
%matplotlib inline
from crowd_sim.envs.crowd_sim_sgan import CrowdSimSgan
from crowd_sim.envs.crowd_sim_no_pred import CrowdSimNoPred
from arguments import get_args
from crowd_nav.configs.config import Config
config = Config()

In [None]:
env = CrowdSimNoPred()
# env = CrowdSimSgan()
env.configure(config)

env.setup(seed=0, num_of_env=1, ax=None)

In [None]:
episodes = 1
for episode in range(1, episodes+1):
    obs = env.reset()
    done = False
    score = 0 
    avg_time = 0
    step = 0
    
    while not done:
        action = (0.0, 0.0)
        start_time = time.time()
        obs, reward, done, info = env.step(action)
        end_time = time.time()
        avg_time += (end_time - start_time)
        step += 1
        score+=reward
        print(obs['local_goal'])
        plt.imshow(obs['local_map'].reshape(obs['local_map'].shape[0],obs['local_map'].shape[1]), cmap='gray')
        plt.colorbar()
        plt.show()
    print('Episode:{} Score:{}'.format(episode, score))
    print('average step time ({} steps): {}s'.format(step, avg_time/step))
env.close()

# Test environment (render simulation)

In [None]:
import time
import matplotlib.pyplot as plt
%matplotlib tk
from crowd_sim.envs.crowd_sim_sgan import CrowdSimSgan
from crowd_sim.envs.crowd_sim_no_pred import CrowdSimNoPred
from arguments import get_args
from crowd_nav.configs.config import Config
config = Config()

In [None]:
fig = plt.figure(figsize=(7, 7))
ax1 = plt.subplot()
ax1.set_xlim(-10, 10)
ax1.set_ylim(-10, 10)
ax1.set_xlabel('x(m)', fontsize=16)
ax1.set_ylabel('y(m)', fontsize=16)

plt.ion()
plt.show()

In [None]:
env = CrowdSimNoPred()
# env = CrowdSimSgan()
env.configure(config)

env.setup(seed=0, num_of_env=1, ax=ax1)

In [None]:
episodes = 1
for episode in range(1, episodes+1):
    obs = env.reset()
    done = False
    score = 0 
    avg_time = 0
    step = 0
    
    while not done:
        env.render()
        action = (1.0, 1.0)
        start_time = time.time()
        obs, reward, done, info = env.step(action)
        end_time = time.time()
        avg_time += (end_time - start_time)
        step += 1
        score+=reward
        print(obs['local_goal'])
    print('Episode:{} Score:{}'.format(episode, score))
    print('average step time ({} steps): {}s'.format(step, avg_time/step))
env.close()

# Training RL

In [1]:
from crowd_sim.envs.crowd_sim_sgan import CrowdSimSgan
from crowd_sim.envs.crowd_sim_no_pred import CrowdSimNoPred
import gym
import time

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.utils import set_random_seed
from stable_baselines3 import PPO, A2C

In [3]:
from arguments import get_args
from crowd_nav.configs.config import Config

config = Config()

In [4]:
from stable_baselines3.common.callbacks import BaseCallback
import os

class TrainAndLoggingCallback(BaseCallback):

    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path

    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)

        return True

In [6]:
CHECKPOINT_DIR = './train/PPO2/'
LOG_DIR = './logs/'

In [7]:
env = CrowdSimNoPred()
# env = CrowdSimSgan()
env.configure(config)
env.setup(seed=0, num_of_env=1)

In [8]:
callback = TrainAndLoggingCallback(check_freq=10000, save_path=CHECKPOINT_DIR)

In [9]:
# model = PPO('MultiInputPolicy', env, verbose=1, tensorboard_log=LOG_DIR, learning_rate=0.000001, 
#             n_steps=512) 
model = PPO('MultiInputPolicy', env, verbose=1, tensorboard_log=LOG_DIR) 

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


In [10]:
model.policy

MultiInputActorCriticPolicy(
  (features_extractor): CombinedExtractor(
    (extractors): ModuleDict(
      (local_goal): Flatten(start_dim=1, end_dim=-1)
      (local_map): NatureCNN(
        (cnn): Sequential(
          (0): Conv2d(1, 32, kernel_size=(8, 8), stride=(4, 4))
          (1): ReLU()
          (2): Conv2d(32, 64, kernel_size=(4, 4), stride=(2, 2))
          (3): ReLU()
          (4): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
          (5): ReLU()
          (6): Flatten(start_dim=1, end_dim=-1)
        )
        (linear): Sequential(
          (0): Linear(in_features=64, out_features=256, bias=True)
          (1): ReLU()
        )
      )
    )
  )
  (pi_features_extractor): CombinedExtractor(
    (extractors): ModuleDict(
      (local_goal): Flatten(start_dim=1, end_dim=-1)
      (local_map): NatureCNN(
        (cnn): Sequential(
          (0): Conv2d(1, 32, kernel_size=(8, 8), stride=(4, 4))
          (1): ReLU()
          (2): Conv2d(32, 64, kernel_size=(4, 4), s

In [11]:
model.learn(total_timesteps=2000000, callback=callback)

Logging to ./logs/PPO_2
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 84.8     |
|    ep_rew_mean     | -15.5    |
| time/              |          |
|    fps             | 62       |
|    iterations      | 1        |
|    time_elapsed    | 32       |
|    total_timesteps | 2048     |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 85.2         |
|    ep_rew_mean          | -16.1        |
| time/                   |              |
|    fps                  | 44           |
|    iterations           | 2            |
|    time_elapsed         | 92           |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0024189902 |
|    clip_fraction        | 0.0246       |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.85        |
|    explained_variance   | 0.0042

KeyboardInterrupt: 

In [None]:
model.save('latestmodel')

# 4. Test it Out

In [None]:
from crowd_sim.envs.crowd_sim_sgan import CrowdSimSgan
from crowd_sim.envs.crowd_sim_no_pred import CrowdSimNoPred
import gym
import time

In [None]:
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.utils import set_random_seed
from stable_baselines3 import PPO, A2C

In [None]:
from arguments import get_args
from crowd_nav.configs.config import Config

config = Config()

In [None]:
import matplotlib.pyplot as plt
%matplotlib tk

In [None]:
fig = plt.figure(figsize=(7, 7))
ax1 = plt.subplot()
ax1.set_xlim(-10, 10)
ax1.set_ylim(-10, 10)
ax1.set_xlabel('x(m)', fontsize=16)
ax1.set_ylabel('y(m)', fontsize=16)

plt.ion()
plt.show()

In [None]:
env = CrowdSimNoPred()
# env = CrowdSimSgan()
env.configure(config)
env.setup(seed=0, num_of_env=1, ax=ax1)

In [None]:
# Load model
model = PPO.load('./train/PPO/best_model_50000', env)

In [None]:
episodes = 5
for episode in range(1, episodes+1):
    obs = env.reset()
    done = False
    score = 0 
    avg_time = 0
    step = 0
    
    while not done:
        env.render()
        action, _states = model.predict(obs)
        start_time = time.time()
        obs, reward, done, info = env.step(action)
        end_time = time.time()
        avg_time += (end_time - start_time)
        step += 1
        score+=reward
        print(obs['local_goal'])
    print('Episode:{} Score:{}'.format(episode, score))
    print('average step time ({} steps): {}s'.format(step, avg_time/step))
env.close()

# Multi-processing

In [None]:
# env_id = 'CrowdSim-v0'
# env_id = 'CrowdSimVarNum-v0'
# env_id = 'CrowdSimSgan-v0'

num_cpu = 4  # Number of processes to use
seed = 0

In [None]:
def make_env(seed, rank, env_config, envNum=1):
    """
    Utility function for multiprocessed env.

    :param env_id: (str) the environment ID
    :param seed: (int) the inital seed for RNG
    :param rank: (int) index of the subprocess
    """

    def _init():
        env = CrowdSimSgan()
        # use a seed for reproducibility
        # Important: use a different seed for each environment
        # otherwise they would generate the same experiences
        env.seed(seed + rank)
        env.setup(seed=seed+rank, num_of_env=envNum)
        env.configure(env_config)
        return env

    return _init

In [None]:
envs = SubprocVecEnv([make_env(seed, i, config, num_cpu) for i in range(num_cpu)])