In [1]:
import sys
import logging
import argparse
import configparser
import os
import shutil
import torch
import gym
import git
import time
import pickle

import sys
sys.path.append('../')
from crowd_sim.envs.utils.robot import Robot
from crowd_nav.utils.trainer import Trainer
from crowd_nav.utils.memory import ReplayMemory
from crowd_nav.utils.explorer import Explorer
from crowd_nav.policy.policy_factory import policy_factory

In [2]:
parser = argparse.ArgumentParser('Parse configuration file')
parser.add_argument('--env_config', type=str, default='configs/env5.config')
parser.add_argument('--policy', type=str, default='sarl')
parser.add_argument('--policy_config', type=str, default='configs/policy.config')
parser.add_argument('--train_config', type=str, default='configs/train.config')
parser.add_argument('--output_dir', type=str, default='data/sarl5')
parser.add_argument('--weights', type=str)
parser.add_argument('--resume', default=False, action='store_true')
parser.add_argument('--device', type=str, default='cuda:0')
parser.add_argument('--debug', default=False, action='store_true')
args = parser.parse_args([])

In [3]:
# configure paths
make_new_dir = True
if os.path.exists(args.output_dir):
    key = input('Output directory already exists! Overwrite the folder? (y/n)')
    if key == 'y' and not args.resume:
        shutil.rmtree(args.output_dir)
    else:
        make_new_dir = False
        args.env_config = os.path.join(args.output_dir, os.path.basename(args.env_config))
        args.policy_config = os.path.join(args.output_dir, os.path.basename(args.policy_config))
        args.train_config = os.path.join(args.output_dir, os.path.basename(args.train_config))
if make_new_dir:
    os.makedirs(args.output_dir)
    shutil.copy(args.env_config, args.output_dir)
    shutil.copy(args.policy_config, args.output_dir)
    shutil.copy(args.train_config, args.output_dir)
log_file = os.path.join(args.output_dir, 'output.log')
il_weight_file = os.path.join(args.output_dir, 'il_model.pth')
rl_weight_file = os.path.join(args.output_dir, 'rl_model.pth')
mem_path = os.path.join(args.output_dir, 'memory.data')

# configure logging
mode = 'a' if args.resume else 'w'
file_handler = logging.FileHandler(log_file, mode=mode)
stdout_handler = logging.StreamHandler(sys.stdout)
level = logging.INFO if not args.debug else logging.DEBUG
logging.basicConfig(level=level, handlers=[stdout_handler, file_handler],
                    format='%(asctime)s, %(levelname)s: %(message)s', datefmt="%Y-%m-%d %H:%M:%S")
repo = git.Repo(search_parent_directories=True)
logging.info('Current git head hash code: %s'.format(repo.head.object.hexsha))
device = torch.device(args.device )
logging.info('Using device: %s', device)

# configure policy
policy = policy_factory[args.policy]()
if not policy.trainable:
    parser.error('Policy has to be trainable')
if args.policy_config is None:
    parser.error('Policy config has to be specified for a trainable network')
policy_config = configparser.RawConfigParser()
policy_config.read(args.policy_config)
policy.configure(policy_config)
policy.set_device(device)

# configure environment
env_config = configparser.RawConfigParser()
env_config.read(args.env_config)
env = gym.make('CrowdSim-v0')
env.configure(env_config)
robot = Robot(env_config, 'robot')
env.set_robot(robot)

# read training parameters
if args.train_config is None:
    parser.error('Train config has to be specified for a trainable network')
train_config = configparser.RawConfigParser()
train_config.read(args.train_config)
rl_learning_rate = train_config.getfloat('train', 'rl_learning_rate')
train_batches = train_config.getint('train', 'train_batches')
train_episodes = train_config.getint('train', 'train_episodes')
sample_episodes = train_config.getint('train', 'sample_episodes')
target_update_interval = train_config.getint('train', 'target_update_interval')
evaluation_interval = train_config.getint('train', 'evaluation_interval')
capacity = train_config.getint('train', 'capacity')
epsilon_start = train_config.getfloat('train', 'epsilon_start')
epsilon_end = train_config.getfloat('train', 'epsilon_end')
epsilon_decay = train_config.getfloat('train', 'epsilon_decay')
checkpoint_interval = train_config.getint('train', 'checkpoint_interval')

# configure trainer and explorer
memory = ReplayMemory(capacity)
model = policy.get_model()
batch_size = train_config.getint('trainer', 'batch_size')
trainer = Trainer(model, memory, device, batch_size)
explorer = Explorer(env, robot, device, memory, policy.gamma, target_policy=policy)

Output directory already exists! Overwrite the folder? (y/n) y


2022-10-10 15:19:44, INFO: Current git head hash code: %s
2022-10-10 15:19:44, INFO: Using device: cuda:0


In [4]:
# imitation learning
if args.resume:
    if not os.path.exists(rl_weight_file):
        logging.error('RL weights does not exist')
    model.load_state_dict(torch.load(rl_weight_file))
    rl_weight_file = os.path.join(args.output_dir, 'resumed_rl_model.pth')
    logging.info('Load reinforcement learning trained weights. Resume training')
elif os.path.exists(il_weight_file):
    model.load_state_dict(torch.load(il_weight_file))
    logging.info('Load imitation learning trained weights.')
else:
    il_episodes = train_config.getint('imitation_learning', 'il_episodes')
    il_policy = train_config.get('imitation_learning', 'il_policy')
    il_epochs = train_config.getint('imitation_learning', 'il_epochs')
    il_learning_rate = train_config.getfloat('imitation_learning', 'il_learning_rate')
    trainer.set_learning_rate(il_learning_rate)
    if robot.visible:
        safety_space = 0
    else:
        safety_space = train_config.getfloat('imitation_learning', 'safety_space')
    il_policy = policy_factory[il_policy]()
    il_policy.multiagent_training = policy.multiagent_training
    il_policy.safety_space = safety_space
    robot.set_policy(il_policy)
    explorer.run_k_episodes(il_episodes, 'train', update_memory=True, imitation_learning=True)
    trainer.optimize_epoch(il_epochs)
    torch.save(model.state_dict(), il_weight_file)
    logging.info('Finish imitation learning. Weights saved.')
    logging.info('Experience set size: %d/%d', len(memory), memory.capacity)
explorer.update_target_model(model)

# pickle memory
logging.info("Saving memory: %s",mem_path)
with open(mem_path, 'wb') as f:
    pickle.dump(memory,f)

TypeError: reset() takes 1 positional argument but 3 were given

Error in atexit._run_exitfuncs:
Traceback (most recent call last):
  File "/home/minhrobot/.conda/envs/meta/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3810, in atexit_operations
    self.reset(new_session=False)
  File "/home/minhrobot/.conda/envs/meta/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 1421, in reset
    self.history_manager.reset(new_session)
  File "/home/minhrobot/.conda/envs/meta/lib/python3.8/site-packages/IPython/core/history.py", line 592, in reset
    self.dir_hist[:] = [os.getcwd()]
FileNotFoundError: [Errno 2] No such file or directory


In [5]:
# reinforcement learning
policy.set_env(env)
robot.set_policy(policy)
robot.print_info()
trainer.set_learning_rate(rl_learning_rate)
# fill the memory pool with some RL experience
if args.resume:
    robot.policy.set_epsilon(epsilon_end)
    explorer.run_k_episodes(10, 'train', update_memory=True, episode=0)
    logging.info('Experience set size: %d/%d', len(memory), memory.capacity)
episode = 0

2022-02-20 17:53:59, INFO: Agent is invisible and has holonomic kinematic constraint
2022-02-20 17:53:59, INFO: Current learning rate: 0.001000


In [6]:
while episode < train_episodes:
    if args.resume:
        epsilon = epsilon_end
    else:
        if episode < epsilon_decay:
            epsilon = epsilon_start + (epsilon_end - epsilon_start) / epsilon_decay * episode
        else:
            epsilon = epsilon_end
    robot.policy.set_epsilon(epsilon)

    # evaluate the model
    # if episode % evaluation_interval == 0:
    #     explorer.run_k_episodes(env.case_size['val'], 'val', episode=episode)

    # sample k episodes into memory and optimize over the generated memory
    explorer.run_k_episodes(sample_episodes, 'train', update_memory=True, episode=episode)
    trainer.optimize_batch(train_batches)
    episode += 1

    if episode % target_update_interval == 0:
        explorer.update_target_model(model)

    if episode != 0 and episode % checkpoint_interval == 0:
        torch.save(model.state_dict(), rl_weight_file)
        logging.info("Saving model to: %s",rl_weight_file)

# final test
explorer.run_k_episodes(env.case_size['test'], 'test', episode=episode)

2022-02-20 17:54:17, INFO: TRAIN in episode 0 has success rate: 1.00, collision rate: 0.00, nav time: 18.25, total reward: 0.1501
2022-02-20 17:54:42, INFO: TRAIN in episode 1 has success rate: 0.00, collision rate: 0.00, nav time: 25.00, total reward: 0.0000
2022-02-20 17:55:02, INFO: TRAIN in episode 2 has success rate: 1.00, collision rate: 0.00, nav time: 20.25, total reward: 0.1216
2022-02-20 17:55:24, INFO: TRAIN in episode 3 has success rate: 1.00, collision rate: 0.00, nav time: 21.00, total reward: 0.1123
2022-02-20 17:55:42, INFO: TRAIN in episode 4 has success rate: 1.00, collision rate: 0.00, nav time: 21.00, total reward: 0.1123
2022-02-20 17:56:02, INFO: TRAIN in episode 5 has success rate: 1.00, collision rate: 0.00, nav time: 20.25, total reward: 0.1216
2022-02-20 17:56:10, INFO: TRAIN in episode 6 has success rate: 0.00, collision rate: 1.00, nav time: 25.00, total reward: -0.1260
2022-02-20 17:56:30, INFO: TRAIN in episode 7 has success rate: 1.00, collision rate: 0.0

KeyboardInterrupt: 

In [None]:
print("Done")