In [5]:
import os, pickle
import numpy as np
from flow.multiagent_envs import MultiWaveAttenuationMergePOEnv
from flow.scenarios import MergeScenario
from flow.utils.registry import make_create_env
from flow.utils.rllib import FlowParamsEncoder
from flow.core.params import InFlows

from ray.tune import run_experiments
from ray.tune.experiment import Experiment, convert_to_experiment_list

import gym, ray
from ray.rllib.agents.ppo import PPOAgent, DEFAULT_CONFIG
from ray.tune.registry import register_env, register_trainable
from ray.tune.logger import pretty_print

benchmark_name = 'multi_merge'

In [6]:
num_rollouts = 60
num_cpus = 62
gae_lambda = 0.97
step_size = 5e-4
benchmark_name = 'multi_merge'
exp_name = 'buff_obs'
AGENT = 'PPO' # 'PPO' or 'DDPG' or 'HUMAN'
checkpoint = '50'
results_list = os.listdir('/headless/ray_results/' + exp_name)
# results_list

In [7]:
ray.init(num_cpus=num_cpus, include_webui=False, ignore_reinit_error=True)

Calling ray.init() again after it has already been called.


In [8]:
# get flow_params
benchmark = __import__(
    "flow.benchmarks.%s" % benchmark_name, fromlist=["flow_params"])
flow_params = benchmark.buffered_obs_flow_params

# inflow rate at the highway
FLOW_RATE = np.random.randint(1000, 2000)
FLOW_RATE_MERGE = np.random.randint(50, 150)
# percent of autonomous vehicles
RL_PENETRATION = 0.05 + np.random.rand() * 0.1

inflow = InFlows()
inflow.add(
    veh_type="human",
    edge="inflow_highway",
    vehs_per_hour=int((1 - RL_PENETRATION) * FLOW_RATE),
    #probability=FLOW_PROB,
    departLane="free",
    departSpeed=10)
inflow.add(
    veh_type="rl",
    edge="inflow_highway",
    vehs_per_hour=int(RL_PENETRATION * FLOW_RATE),
    #probability=FLOW_PROB_MERGE,
    departLane="free",
    departSpeed=10)
inflow.add(
    veh_type="human",
    edge="inflow_merge",
    vehs_per_hour=FLOW_RATE_MERGE,
    #probability=FLOW_PROB_RL,
    departLane="free",
    departSpeed=7.5)

# generate new flow_params
net = flow_params['net']
net.inflows = inflow

# flow_params['net'].inflows.get()
create_env, env_name = make_create_env(params=flow_params, version=0)

# Register as rllib env
register_env('Rondom'+env_name, create_env)

In [9]:
base = '/headless/ray_results/' + exp_name + '/' + \
    'PPO_MultiWaveAttenuationMergePOEnvBufferedObs-v0_[eta1, eta2, eta3]:[1.0, 0.2, 0.3]_t_min:10.0_7_2019-04-28_11-49-51_zkx7fvy'
config_path = base + '/params.pkl'
checkpoint_path = base + '/checkpoint_{}/checkpoint-{}'.format(checkpoint, checkpoint)

with open(config_path, mode='rb') as f:
    config = pickle.load(f)

In [13]:
exp_tag = {
    "run": AGENT,
    "env": 'Rondom'+env_name,
    "config": {
        **config
    },
    "checkpoint_freq": 20,
    "max_failures": 999,
    "stop": {
        "training_iteration": 55
    },
    "num_samples": 1,
    "restore": checkpoint_path
}

In [20]:
a = convert_to_experiment_list({'test':exp_tag})
a = a[0]
a.spec

{'checkpoint_at_end': False,
 'checkpoint_freq': 20,
 'config': {'batch_mode': 'truncate_episodes',
  'callbacks': {'on_episode_end': <ray.tune.suggest.variant_generator.function at 0x7fb3838e58d0>,
   'on_episode_start': <ray.tune.suggest.variant_generator.function at 0x7fb3838e5e80>,
   'on_episode_step': <ray.tune.suggest.variant_generator.function at 0x7fb3838e5f60>,
   'on_sample_end': None,
   'on_train_result': None},
  'clip_actions': False,
  'clip_param': 0.3,
  'clip_rewards': None,
  'collect_metrics_timeout': 180,
  'compress_observations': False,
  'custom_resources_per_worker': {},
  'entropy_coeff': 0.0,
  'env': 'RondomMultiWaveAttenuationMergePOEnvBufferedObs-v0',
   'run': 'PPO'},
  'gamma': 0.99,
  'horizon': 1500,
  'input': 'sampler',
  'input_evaluation': None,
  'kl_coeff': 0.2,
  'kl_target': 0.01,
  'lambda': 0.97,
  'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8,
   'intra_op_parallelism_threads': 8},
  'log_level': 'INFO',
  'lr': 0.00

In [11]:
trials = run_experiments({
        'resume': exp_tag
    })

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/62 CPUs, 0/0 GPUs
Memory usage on this node: 4.9/60.8 GB

Created LogSyncer for /headless/ray_results/resume/PPO_RondomMultiWaveAttenuationMergePOEnvBufferedObs-v0_0_2019-04-28_20-21-59fbbxy7s4 -> 
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 61/62 CPUs, 0/0 GPUs
Memory usage on this node: 13.7/60.8 GB
Result logdir: /headless/ray_results/resume
RUNNING trials:
 - PPO_RondomMultiWaveAttenuationMergePOEnvBufferedObs-v0_0:	RUNNING

Result for PPO_RondomMultiWaveAttenuationMergePOEnvBufferedObs-v0_0:
  custom_metrics:
    cost1_max: 422.52381393851556
    cost1_mean: 335.40206726668885
    cost1_min: 266.1116126661171
    cost2_max: -0.05623132780832304
    cost2_mean: -0.10158752917469
    cost2_min: -0.37872786335556763
    outflow_rate_max: 1068.8300648039872
    outflow_rate_mean: 957.2961641082636
    outflow_rate_min: 866.969116199853
    system_level_velocity_max: 15.851506831367999
    syste

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 61/62 CPUs, 0/0 GPUs
Memory usage on this node: 18.4/60.8 GB
Result logdir: /headless/ray_results/resume
RUNNING trials:
 - PPO_RondomMultiWaveAttenuationMergePOEnvBufferedObs-v0_0:	RUNNING [pid=54162], 2241 s, 54 iter, 6077880 ts, 1.22e+03 rew

Result for PPO_RondomMultiWaveAttenuationMergePOEnvBufferedObs-v0_0:
  custom_metrics:
    cost1_max: 420.27899713393066
    cost1_mean: 304.8641079858787
    cost1_min: 231.0697119961885
    cost2_max: -0.0561963147132335
    cost2_mean: -0.10067309673319903
    cost2_min: -0.41198749972064197
    outflow_rate_max: 1066.4535322036138
    outflow_rate_mean: 909.9508155068143
    outflow_rate_min: 802.8126822824256
    system_level_velocity_max: 15.72486523642408
    system_level_velocity_mean: 11.295167940033933
    system_level_velocity_min: 8.635244451129742
  date: 2019-04-28_20-25-51
  done: true
  episode_len_mean: 700.0
  episode_reward_max: 1346.0449733335574
  episode_re

In [12]:
config

{'batch_mode': 'truncate_episodes',
 'callbacks': {'on_episode_end': <ray.tune.suggest.variant_generator.function at 0x7fb3838e9780>,
  'on_episode_start': <ray.tune.suggest.variant_generator.function at 0x7fb3838e9278>,
  'on_episode_step': <ray.tune.suggest.variant_generator.function at 0x7fb3838e9710>,
  'on_sample_end': None,
  'on_train_result': None},
 'clip_actions': False,
 'clip_param': 0.3,
 'clip_rewards': None,
 'collect_metrics_timeout': 180,
 'compress_observations': False,
 'custom_resources_per_worker': {},
 'entropy_coeff': 0.0,
 'env': 'MultiWaveAttenuationMergePOEnvBufferedObs-v0_[eta1, eta2, eta3]:[1.0, 0.2, 0.3]_t_min:10.0',
  'run': 'PPO'},
 'gamma': 0.99,
 'horizon': 1500,
 'input': 'sampler',
 'input_evaluation': None,
 'kl_coeff': 0.2,
 'kl_target': 0.01,
 'lambda': 0.97,
 'local_evaluator_tf_session_args': {'inter_op_parallelism_threads': 8,
  'intra_op_parallelism_threads': 8},
 'log_level': 'INFO',
 'lr': 0.0005,
 'lr_schedule': None,
 'model': {'channel_maj