# Testing

In [None]:
import gym
import torch as th

from stable_baselines3 import PPO

# Custom actor (pi) and value function (vf) networks
# of two layers of size 32 each with Relu activation function
# Note: an extra linear layer will be added on top of the pi and the vf nets, respectively
policy_kwargs = dict(activation_fn=th.nn.ReLU,
                     net_arch=dict(pi=[32, 32], vf=[32, 32]))
# Create the agent
model = PPO("MlpPolicy", "CartPole-v1", policy_kwargs=policy_kwargs, verbose=1)


In [None]:
model.policy

In [None]:
# Retrieve the environment
env = model.get_env()
# Train the agent
model.learn(total_timesteps=20_000)
# Save the agent
model.save("ppo_cartpole")

del model
# the policy_kwargs are automatically loaded
model = PPO.load("ppo_cartpole", env=env)

# Action Wrapper

In [None]:
import gym
from gym.spaces import Discrete
import numpy as np

In [None]:
class DiscreteActions(gym.ActionWrapper):
    def __init__(self, env, disc_to_cont):
        super().__init__(env)
        self.disc_to_cont = disc_to_cont
        self.action_space = Discrete(len(disc_to_cont))
    
    def action(self, act):
        return self.disc_to_cont[act]

In [None]:
'''
0: Vx=0, Vy=0
1: Vx=1, Vy=0
2: Vx=0, Vy=1
3: Vx=-1, Vy=0
4: Vx=0, Vy=-1
5: Vx=1, Vy=1
6: Vx=-1, Vy=-1
7: Vx=1, Vy=-1
8: Vx=-1, Vy=1
'''
discrete_actions = [np.array([0,0]), np.array([1,0]),
                    np.array([0,1]), np.array([-1,0]),
                    np.array([0,-1]), np.array([1,1]),
                    np.array([-1,-1]), np.array([1,-1]),
                    np.array([-1,1])]

# SGAN

In [None]:
import sys
sys.path.append('/home/koksyuen/python_project/sgan')

In [None]:
# from predictor import get_generator
import torch
from attrdict import AttrDict
from sgan.models import TrajectoryGenerator
from sgan.utils import relative_to_abs, abs_to_relative

In [None]:
model_path='/home/koksyuen/python_project/sgan/models/sgan-p-models/eth_8_model.pt'
checkpoint = torch.load(model_path)

In [None]:
args = AttrDict(checkpoint['args'])
Sgan = TrajectoryGenerator(
    obs_len=args.obs_len,
    pred_len=args.pred_len,
    embedding_dim=args.embedding_dim,
    encoder_h_dim=args.encoder_h_dim_g,
    decoder_h_dim=args.decoder_h_dim_g,
    mlp_dim=args.mlp_dim,
    num_layers=args.num_layers,
    noise_dim=args.noise_dim,
    noise_type=args.noise_type,
    noise_mix_type=args.noise_mix_type,
    pooling_type=args.pooling_type,
    pool_every_timestep=args.pool_every_timestep,
    dropout=args.dropout,
    bottleneck_dim=args.bottleneck_dim,
    neighborhood_size=args.neighborhood_size,
    grid_size=args.grid_size,
    batch_norm=args.batch_norm)
Sgan.load_state_dict(checkpoint['g_state'])
Sgan.cuda()
# generator.train()
# generator.eval()

In [None]:
for param in Sgan.parameters():
    param.requires_grad = False

In [None]:
for name,param in Sgan.named_parameters():
    print(name)
    if param.requires_grad:
        print(param)

# APF

In [1]:
from crowd_sim.envs.crowd_sim_raw import CrowdSimRaw
from sb3.feature_extractor import Preprocessor
import gym
import time
import torch
import matplotlib.pyplot as plt

from arguments import get_args
from crowd_nav.configs.config import Config
config = Config()

In [2]:
env = CrowdSimRaw()
env.configure(config)
env.setup(seed=0, num_of_env=1, ax=None)

In [3]:
obs_np = env.reset()

In [4]:
obs = torch.from_numpy(obs_np).cuda().float()

In [5]:
preprocessor = Preprocessor(map_size=12, map_resolution=0.1)

In [6]:
pmap = preprocessor(obs)

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


In [7]:
pmap.shape, pmap.dtype, pmap.device

(torch.Size([1, 120, 120]), torch.float32, device(type='cuda', index=0))

In [8]:
from torch.nn import Conv2d, ReLU, Flatten

conv1 = Conv2d(1, 32, kernel_size=4, stride=2, padding=0).cuda()
relu1 = ReLU().cuda()
conv2 = Conv2d(32, 64, kernel_size=4, stride=2, padding=0).cuda()
relu2 = ReLU().cuda()
conv3 = Conv2d(64, 64, kernel_size=3, stride=1, padding=0).cuda()
relu3 = ReLU().cuda()
flat1 = Flatten().cuda()

In [9]:
x1 = relu1(conv1(pmap))
x1.shape

torch.Size([32, 59, 59])

In [10]:
x2 = relu2(conv2(x1))
x2.shape

torch.Size([64, 28, 28])

In [11]:
x3 = relu3(conv3(x2))
x3.shape

torch.Size([64, 26, 26])

In [17]:
x5 = x3.flatten()
x5.shape

torch.Size([43264])

In [15]:
x4 = flat1(x3, start_dim=0, end_dim=-1)
x1.shape

TypeError: forward() got an unexpected keyword argument 'start_dim'

In [None]:
plt.imshow(pmap.reshape(pmap.shape[1], pmap.shape[2]).cpu(), cmap='gray')

## Testing APF

In [None]:
map_resolution = 0.1
map_size = 12

KP = 9.0  # attractive potential gain
ETA = 1.0  # repulsive potential gain
# decay rate with respect to time    0.9^(t-t0)
DECAY = [1.0, 0.9, 0.81, 0.73, 0.66, 0.59, 0.53, 0.48]

In [None]:
pred_traj.shape, pred_traj.device

In [None]:
gx.shape, gx.device, gy.shape, gy.device

In [None]:
radius.device, radius.shape

In [None]:
# center of map
center = map_size / 2
# number of grid
width = int(round((map_size / map_resolution)))

In [None]:
# Create an empty map
# 0: x-axis    1: y-axis
pmap_y, pmap_x = torch.meshgrid(torch.linspace(center, -center, width, device='cuda'), torch.linspace(center, -center, width, device='cuda'))

In [None]:
pmap_x.device, pmap_x.shape

In [None]:
ug = 0.5 * KP * torch.hypot(pmap_x - gx, pmap_y - gy)

In [None]:
ug.shape, ug.device

In [None]:
# reshape to (pmap.shape[0], pmap.shape[1], 1, 1)
# pmap_x = pmap_x.reshape(pmap_x.shape[0], pmap_x.shape[1], 1, 1)
# pmap_y = pmap_y.reshape(pmap_y.shape[0], pmap_y.shape[1], 1, 1)

In [None]:
dq = torch.hypot(pmap_x.reshape(pmap_x.shape[0], pmap_x.shape[1], 1, 1) - pred_traj[:, :, 0], pmap_y.reshape(pmap_y.shape[0], pmap_y.shape[1], 1, 1) - pred_traj[:, :, 1])

In [None]:
dq.shape, dq.device

In [None]:
dq[dq <= 0.1] = 0.1

In [None]:
dq.shape, dq.device

In [None]:
uo = 0.5 * ETA * (1.0 / dq - 1.0 / radius) ** 2

In [None]:
uo.shape, uo.device

In [None]:
uo = uo.permute(0, 1, 3, 2)

In [None]:
uo.shape, uo.device

In [None]:
DECAY = [1.0, 0.9, 0.81, 0.73, 0.66, 0.59, 0.53, 0.48]
decay = torch.tensor(DECAY, device='cuda')

In [None]:
uo = decay * uo

In [None]:
uo.shape, uo.device

In [None]:
uo = torch.amax(uo, dim=(-2, -1))

In [None]:
uo.shape, uo.device, uo.dtype

In [None]:
u_total = torch.add(ug, uo)

In [None]:
u_total.shape, u_total.device

In [None]:
pmap_norm = (u_total-torch.min(u_total))/(torch.max(u_total)-torch.min(u_total)) * 255
pmap_norm = torch.round(pmap_norm).type(torch.uint8)
pmap_norm.reshape(pmap_norm.shape[0], pmap_norm.shape[1], 1)
pmap_norm.shape, pmap_norm.device

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.imshow(pmap_norm.cpu(), cmap='gray')

# Custom feature extractor

In [1]:
from crowd_sim.envs.crowd_sim_raw import CrowdSimRaw
from sb3.feature_extractor import Preprocessor, ApfFeaturesExtractor
from stable_baselines3 import PPO, A2C, DQN
import gym
import time
import torch
import matplotlib.pyplot as plt
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv

from arguments import get_args
from crowd_nav.configs.config import Config
config = Config()

In [2]:
env = CrowdSimRaw()
env.configure(config)
env.setup(seed=0, num_of_env=1, ax=None)

In [3]:
def make_env(seed, rank, env_config, envNum=1):
    """
    Utility function for multiprocessed env.

    :param env_id: (str) the environment ID
    :param seed: (int) the inital seed for RNG
    :param rank: (int) index of the subprocess
    """

    def _init():
        env = CrowdSimRaw()
        # use a seed for reproducibility
        # Important: use a different seed for each environment
        # otherwise they would generate the same experiences
        env.seed(seed + rank)
        env.setup(seed=seed+rank, num_of_env=envNum)
        env.configure(env_config)
        # env = DiscreteActions(env, discrete_actions)
        return env

    return _init

In [4]:
# venv = DummyVecEnv([make_env(0, i, config, 2) for i in range(2)])
venv = SubprocVecEnv([make_env(0, i, config, 2) for i in range(2)])

In [5]:
venv

<stable_baselines3.common.vec_env.subproc_vec_env.SubprocVecEnv at 0x7eff921797c0>

In [6]:
policy_kwargs = dict(
    features_extractor_class=ApfFeaturesExtractor,
    features_extractor_kwargs=dict(features_dim=512),
)
model = PPO("CnnPolicy", venv, policy_kwargs=policy_kwargs, verbose=1, device='cuda')

Using cuda device


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


torch.Size([64, 121])


In [8]:
for name,param in model.policy.named_parameters():
    print(name)
    if param.requires_grad:
        print(param)

log_std
Parameter containing:
tensor([0., 0.], device='cuda:0', requires_grad=True)
features_extractor.apf_generator.traj_predictor.encoder.encoder.weight_ih_l0
features_extractor.apf_generator.traj_predictor.encoder.encoder.weight_hh_l0
features_extractor.apf_generator.traj_predictor.encoder.encoder.bias_ih_l0
features_extractor.apf_generator.traj_predictor.encoder.encoder.bias_hh_l0
features_extractor.apf_generator.traj_predictor.encoder.spatial_embedding.weight
features_extractor.apf_generator.traj_predictor.encoder.spatial_embedding.bias
features_extractor.apf_generator.traj_predictor.decoder.decoder.weight_ih_l0
features_extractor.apf_generator.traj_predictor.decoder.decoder.weight_hh_l0
features_extractor.apf_generator.traj_predictor.decoder.decoder.bias_ih_l0
features_extractor.apf_generator.traj_predictor.decoder.decoder.bias_hh_l0
features_extractor.apf_generator.traj_predictor.decoder.spatial_embedding.weight
features_extractor.apf_generator.traj_predictor.decoder.spatial_emb

In [7]:
model.policy

ActorCriticCnnPolicy(
  (features_extractor): ApfFeaturesExtractor(
    (apf_generator): Preprocessor(
      (traj_predictor): TrajectoryGenerator(
        (encoder): Encoder(
          (encoder): LSTM(16, 32)
          (spatial_embedding): Linear(in_features=2, out_features=16, bias=True)
        )
        (decoder): Decoder(
          (decoder): LSTM(16, 32)
          (spatial_embedding): Linear(in_features=2, out_features=16, bias=True)
          (hidden2pos): Linear(in_features=32, out_features=2, bias=True)
        )
        (pool_net): PoolHiddenNet(
          (spatial_embedding): Linear(in_features=2, out_features=16, bias=True)
          (mlp_pre_pool): Sequential(
            (0): Linear(in_features=48, out_features=512, bias=True)
            (1): ReLU()
            (2): Linear(in_features=512, out_features=8, bias=True)
            (3): ReLU()
          )
        )
        (mlp_decoder_context): Sequential(
          (0): Linear(in_features=40, out_features=64, bias=True)
  

# Simulator

In [None]:
episodes = 1
for episode in range(1, episodes+1):
    obs = env.reset()
    done = False
    score = 0 
    avg_time = 0
    step = 0
    
    while not done:
        action = (0.0, 0.0)
        start_time = time.time()
        obs, reward, done, info = env.step(action)
        end_time = time.time()
        avg_time += (end_time - start_time)
        step += 1
        score+=reward
        print(obs['local_goal'])
        plt.imshow(obs['local_map'].reshape(obs['local_map'].shape[0],obs['local_map'].shape[1]), cmap='gray')
        plt.colorbar()
        plt.show()
    print('Episode:{} Score:{}'.format(episode, score))
    print('average step time ({} steps): {}s'.format(step, avg_time/step))
env.close()

# RL

In [None]:
import numpy as np
  
arr1 = np.array([0, 2, 3]).reshape(3,1)
arr2 = np.array([5, 4, 9]).reshape(3,1)
  
gfg = np.concatenate((arr1, arr2), axis = 1)
print(arr1.shape)
print(arr2.shape)
print(gfg.shape)

In [None]:
import numpy as np
  
arr1 = np.array([[2, 4], [6, 8]])
arr2 = np.array([[7], [7]])
  
gfg = np.concatenate((arr1, arr2), axis = 1)
print(arr1.shape)
print(arr2.shape)
print(gfg.shape)

In [None]:
emotion = np.random.rand(10)
radius = np.random.rand(10)

er = np.concatenate((emotion, radius), axis = 0)
print(emotion.shape)
print(radius.shape)
print(er.shape)