# Test environment (no render)

In [None]:
import time
import matplotlib.pyplot as plt
from crowd_sim.envs.crowd_sim_sgan import CrowdSimSgan
from arguments import get_args
from crowd_nav.configs.config import Config
config = Config()

In [None]:
env = CrowdSimSgan()
env.configure(config)

env.setup(seed=0, num_of_env=1, ax=None)

In [None]:
episodes = 1
for episode in range(1, episodes+1):
    obs = env.reset()
    done = False
    score = 0 
    avg_time = 0
    step = 0
    
    while not done:
        action = (0.0, 0.0)
        start_time = time.time()
        obs, reward, done, info = env.step(action)
        end_time = time.time()
        avg_time += (end_time - start_time)
        step += 1
        score+=reward
        print(obs['local_goal'])
        plt.imshow(obs['local_map'].reshape(obs['local_map'].shape[0],obs['local_map'].shape[1]), cmap='gray')
        plt.colorbar()
        plt.show()
    print('Episode:{} Score:{}'.format(episode, score))
    print('average step time ({} steps): {}s'.format(step, avg_time/step))
env.close()

# Test environment (render simulation)

In [None]:
import time
import matplotlib.pyplot as plt
%matplotlib tk
from crowd_sim.envs.crowd_sim_sgan import CrowdSimSgan
from arguments import get_args
from crowd_nav.configs.config import Config
config = Config()

In [None]:
fig = plt.figure(figsize=(7, 7))
ax1 = plt.subplot()
ax1.set_xlim(-10, 10)
ax1.set_ylim(-10, 10)
ax1.set_xlabel('x(m)', fontsize=16)
ax1.set_ylabel('y(m)', fontsize=16)

plt.ion()
plt.show()

In [None]:
env = CrowdSimSgan()
env.configure(config)

env.setup(seed=0, num_of_env=1, ax=ax1)

In [None]:
episodes = 1
for episode in range(1, episodes+1):
    obs = env.reset()
    done = False
    score = 0 
    avg_time = 0
    step = 0
    
    while not done:
        env.render()
        action = (1.0, 1.0)
        start_time = time.time()
        obs, reward, done, info = env.step(action)
        end_time = time.time()
        avg_time += (end_time - start_time)
        step += 1
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))
    print('average step time ({} steps): {}s'.format(step, avg_time/step))
env.close()

# Training RL

In [1]:
from crowd_sim.envs.crowd_sim_sgan import CrowdSimSgan
import gym
import time

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.utils import set_random_seed
from stable_baselines3 import PPO, A2C

In [3]:
from arguments import get_args
from crowd_nav.configs.config import Config

config = Config()

In [4]:
from stable_baselines3.common.callbacks import BaseCallback
import os

class TrainAndLoggingCallback(BaseCallback):

    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path

    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)

        return True

In [5]:
CHECKPOINT_DIR = './train/A2C'
LOG_DIR = './logs/A2C'

In [6]:
env = CrowdSimSgan()
env.configure(config)
env.setup(seed=0, num_of_env=1)

In [7]:
callback = TrainAndLoggingCallback(check_freq=10000, save_path=CHECKPOINT_DIR)

In [8]:
# model = PPO('MultiInputPolicy', env, verbose=1, tensorboard_log=LOG_DIR, learning_rate=0.000001, 
#             n_steps=512) 
model = A2C('MultiInputPolicy', env, verbose=1, tensorboard_log=LOG_DIR) 

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


In [9]:
model.policy

MultiInputActorCriticPolicy(
  (features_extractor): CombinedExtractor(
    (extractors): ModuleDict(
      (local_goal): Flatten(start_dim=1, end_dim=-1)
      (local_map): NatureCNN(
        (cnn): Sequential(
          (0): Conv2d(1, 32, kernel_size=(8, 8), stride=(4, 4))
          (1): ReLU()
          (2): Conv2d(32, 64, kernel_size=(4, 4), stride=(2, 2))
          (3): ReLU()
          (4): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
          (5): ReLU()
          (6): Flatten(start_dim=1, end_dim=-1)
        )
        (linear): Sequential(
          (0): Linear(in_features=64, out_features=256, bias=True)
          (1): ReLU()
        )
      )
    )
  )
  (pi_features_extractor): CombinedExtractor(
    (extractors): ModuleDict(
      (local_goal): Flatten(start_dim=1, end_dim=-1)
      (local_map): NatureCNN(
        (cnn): Sequential(
          (0): Conv2d(1, 32, kernel_size=(8, 8), stride=(4, 4))
          (1): ReLU()
          (2): Conv2d(32, 64, kernel_size=(4, 4), s

In [10]:
model.learn(total_timesteps=2000000, callback=callback)

Logging to ./logs/A2C/A2C_1
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 120      |
|    ep_rew_mean        | -7.72    |
| time/                 |          |
|    fps                | 55       |
|    iterations         | 100      |
|    time_elapsed       | 8        |
|    total_timesteps    | 500      |
| train/                |          |
|    entropy_loss       | -2.82    |
|    explained_variance | 0.00483  |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | -0.39    |
|    std                | 0.992    |
|    value_loss         | 0.164    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 60.8     |
|    ep_rew_mean        | -14.2    |
| time/                 |          |
|    fps                | 53       |
|    iterations         | 200      |
|    time_elapsed       | 18       |
|    total

KeyboardInterrupt: 

In [None]:
model.save('latestmodel')

# 4. Test it Out

In [1]:
from crowd_sim.envs.crowd_sim_sgan import CrowdSimSgan
import gym
import time

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.utils import set_random_seed
from stable_baselines3 import PPO, A2C

In [3]:
from arguments import get_args
from crowd_nav.configs.config import Config

config = Config()

In [4]:
import matplotlib.pyplot as plt
%matplotlib tk

In [5]:
fig = plt.figure(figsize=(7, 7))
ax1 = plt.subplot()
ax1.set_xlim(-10, 10)
ax1.set_ylim(-10, 10)
ax1.set_xlabel('x(m)', fontsize=16)
ax1.set_ylabel('y(m)', fontsize=16)

plt.ion()
plt.show()

In [6]:
env = CrowdSimSgan()
env.configure(config)
env.setup(seed=0, num_of_env=1, ax=ax1)

In [7]:
# Load model
model = A2C.load('./train/A2C/best_model_30000', env)

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


In [None]:
episodes = 5
for episode in range(1, episodes+1):
    obs = env.reset()
    done = False
    score = 0 
    avg_time = 0
    step = 0
    
    while not done:
        env.render()
        action, _states = model.predict(obs)
        start_time = time.time()
        obs, reward, done, info = env.step(action)
        end_time = time.time()
        avg_time += (end_time - start_time)
        step += 1
        score+=reward
        # print(obs['local_goal'])
    print('Episode:{} Score:{}'.format(episode, score))
    print('average step time ({} steps): {}s'.format(step, avg_time/step))
env.close()

[0.32686502 0.77178425]
[0.31213364 0.7865156 ]
[0.30216557 0.7984849 ]
[0.28743416 0.8132163 ]
[0.27270278 0.8279477 ]
[0.25797138 0.8426791 ]
[0.24324   0.8574105]
[0.23773143 0.8721419 ]
[0.24487036 0.88687325]
[0.23013897 0.90160465]
[0.22004367 0.91633606]
[0.20531228 0.9310674 ]
[0.19058089 0.9457988 ]
[0.17832337 0.94631636]
[0.16874214 0.96104777]
[0.18347353 0.94631636]
[0.16874214 0.931585  ]
[0.15401074 0.9168536 ]
[0.15808086 0.9021222 ]
[0.15819785 0.8873908 ]
[0.1466592  0.87265944]
[0.13445391 0.85792804]
[0.13949986 0.87164485]
[0.15423125 0.85691345]
[0.13949986 0.8421821 ]
[0.15004627 0.85691345]
[0.14855433 0.8421821 ]
[0.16328572 0.8360187 ]
[0.17801711 0.8507501 ]
[0.17714056 0.8360187 ]
[0.16240917 0.82128733]
[0.14767778 0.8178012 ]
[0.13294639 0.8325325 ]
[0.11964552 0.8178012 ]
[0.11572696 0.8325325 ]
[0.11244512 0.8178012 ]
[0.09771373 0.80493134]
[0.08298234 0.80420136]
[0.06825095 0.8058692 ]
[0.05389191 0.8206006 ]
[0.06446873 0.8058692 ]
[0.06362956 0.7948

# Multi-processing

In [None]:
# env_id = 'CrowdSim-v0'
# env_id = 'CrowdSimVarNum-v0'
# env_id = 'CrowdSimSgan-v0'

num_cpu = 4  # Number of processes to use
seed = 0

In [None]:
def make_env(seed, rank, env_config, envNum=1):
    """
    Utility function for multiprocessed env.

    :param env_id: (str) the environment ID
    :param seed: (int) the inital seed for RNG
    :param rank: (int) index of the subprocess
    """

    def _init():
        env = CrowdSimSgan()
        # use a seed for reproducibility
        # Important: use a different seed for each environment
        # otherwise they would generate the same experiences
        env.seed(seed + rank)
        env.setup(seed=seed+rank, num_of_env=envNum)
        env.configure(env_config)
        return env

    return _init

In [None]:
envs = SubprocVecEnv([make_env(seed, i, config, num_cpu) for i in range(num_cpu)])