In [1]:
import json
import ray
from ray.rllib.agents.registry import get_agent_class
from ray.tune import run_experiments
from ray.tune.registry import register_env
import numpy as np
from flow.networks.ring import RingNetwork, ADDITIONAL_NET_PARAMS
from flow.utils.registry import make_create_env
from flow.utils.rllib import FlowParamsEncoder
from flow.core.params import SumoParams, EnvParams, InitialConfig, NetParams
from flow.core.params import VehicleParams, SumoCarFollowingParams
from flow.controllers import RLController, IDMController, ContinuousRouter
from gym.spaces.box import Box
from gym.spaces import Tuple
from flow.core.params import InFlows
from flow.controllers import SimLaneChangeController
from flow.networks import Network
import os
from flow.controllers.routing_controllers import ConstructionRouter
from flow.core.params import SumoLaneChangeParams

from env_constructionV4_padding import myEnv

ADDITIONAL_ENV_PARAMS = {
    "max_accel": 1,
    "max_decel": 1,
}

# time horizon of a single rollout
HORIZON = 1500
# number of rollouts per training iteration
N_ROLLOUTS = 20
# number of parallel workers
N_CPUS = 2

# We place one autonomous vehicle and 22 human-driven vehicles in the network
vehicles = VehicleParams()
vehicles.add("rl",
             acceleration_controller=(RLController, {}),
             lane_change_controller=(SimLaneChangeController, {}),
             routing_controller=(ConstructionRouter, {}),
             car_following_params=SumoCarFollowingParams(
                 speed_mode="obey_safe_speed",  
                 # we use the speed mode "obey_safe_speed" for better dynamics at the merge
             ),
             num_vehicles=2)
vehicles.add("human",
             acceleration_controller=(IDMController, {}),
             lane_change_controller=(SimLaneChangeController, {}),
             #routing_controller=(ContinuousRouter, {}),
             car_following_params=SumoCarFollowingParams(
                 speed_mode="obey_safe_speed",  
                 # we use the speed mode "obey_safe_speed" for better dynamics at the merge
             ),
             lane_change_params = SumoLaneChangeParams(lane_change_mode="strategic",lcpushy=1.0),
             num_vehicles=0)

# specify the edges vehicles can originate on
initial_config = InitialConfig(
    edges_distribution=["gneE4","gneE35","gneE38"]
)

# specify the routes for vehicles in the network
class Network(Network):

    def specify_routes(self, net_params):
        return {
                "gneE35": ["gneE35","gneE15","gneE13","gneE4.264","gneE4.264.110","gneE8","gneE9","gneE9.252","gneE33"],
                "gneE4": ["gneE4","gneE17","gneE11","gneE13","gneE4.264","gneE4.264.110","gneE8","gneE9","gneE9.252","gneE33"],
               "gneE8": ["gneE8","gneE9","gneE37","gneE38","gneE39","gneE4.264.110","gneE8"]
               }


inflow = InFlows()
inflow.add(veh_type="human",
           edge="gneE4",
           vehs_per_hour=1000,
            depart_lane="random",
            depart_speed="random",
            color="white")
inflow.add(veh_type="human",
           edge="gneE35",
           vehs_per_hour=5000,
            depart_lane="random",
            depart_speed="random",
            color="blue")


file_dir = "/home/llavezzo/"
net_params = NetParams(
    template="/mnt/c/Users/llave/Documents/GitHub/flow_osuphysics/lucalavezzo/constructionV4.net.xml",
    inflows=inflow
)

flow_params = dict(
    # name of the experiment
    exp_tag="construction_traffic",

    # name of the flow environment the experiment is running on
    env_name=myEnv,  # <------ here we replace the environment with our new environment

    # name of the network class the experiment is running on
    network=Network,

    # simulator that is used by the experiment
    simulator='traci',

    # sumo-related parameters (see flow.core.params.SumoParams)
    sim=SumoParams(
        sim_step=0.1,
        render=False,
        restart_instance=True,
    ),

    # environment related parameters (see flow.core.params.EnvParams)
    env=EnvParams(
        horizon=HORIZON,
        warmup_steps=750,
        clip_actions=False,
        additional_params={
            "target_velocity": 20,
            "sort_vehicles": False,
            "max_accel": 1,
            "max_decel": 1,
        },
    ),

    # network-related parameters (see flow.core.params.NetParams and the
    # network's documentation or ADDITIONAL_NET_PARAMS component)
    net=net_params,

    # vehicles to be placed in the network at the start of a rollout (see
    # flow.core.params.VehicleParams)
    veh=vehicles,

    # parameters specifying the positioning of vehicles upon initialization/
    # reset (see flow.core.params.InitialConfig)
    initial=initial_config
)


def setup_exps():
    """Return the relevant components of an RLlib experiment.

    Returns
    -------
    str
        name of the training algorithm
    str
        name of the gym environment to be trained
    dict
        training configuration parameters
    """
    alg_run = "PPO"

    agent_cls = get_agent_class(alg_run)
    config = agent_cls._default_config.copy()
    config["num_workers"] = N_CPUS
    config["train_batch_size"] = HORIZON * N_ROLLOUTS
    config["gamma"] = 0.999  # discount rate
    config["model"].update({"fcnet_hiddens": [3, 3]})
    config["use_gae"] = True
    config["lambda"] = 0.97
    config["kl_target"] = 0.02
    config["num_sgd_iter"] = 10
    config['clip_actions'] = False  # FIXME(ev) temporary ray bug
    config["horizon"] = HORIZON

    # save the flow params for replay
    flow_json = json.dumps(
        flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4)
    config['env_config']['flow_params'] = flow_json
    config['env_config']['run'] = alg_run

    create_env, gym_name = make_create_env(params=flow_params, version=0)

    # Register as rllib env
    register_env(gym_name, create_env)
    return alg_run, gym_name, config




  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [None]:
alg_run, gym_name, config = setup_exps()
ray.init(num_cpus=N_CPUS + 1)
trials = run_experiments({
    flow_params["exp_tag"]: {
        "run": alg_run,
        "env": gym_name,
        "config": {
            **config
        },
        "checkpoint_freq": 20,
        "checkpoint_at_end": True,
        "max_failures": 999,
        "stop": {
            "training_iteration": 200,
        },
    }
})


2020-03-12 11:11:13,037	INFO resource_spec.py:212 -- Starting Ray with 4.79 GiB memory available for workers and up to 2.41 GiB for objects. You can adjust these settings with ray.init(memory=<bytes>, object_store_memory=<bytes>).


Trial name,status,loc
PPO_myEnv-v0_b75e2a10,RUNNING,


[2m[33m(pid=raylet)[0m E0312 11:11:13.926358600    3137 socket_utils_common_posix.cc:208] check for SO_REUSEPORT: {"created":"@1584025873.926334400","description":"Protocol not available","errno":92,"file":"external/com_github_grpc_grpc/src/core/lib/iomgr/socket_utils_common_posix.cc","file_line":185,"os_error":"Protocol not available","syscall":"getsockopt(SO_REUSEPORT)"}
[2m[33m(pid=raylet)[0m E0312 11:11:13.926807900    3137 socket_utils_common_posix.cc:313] setsockopt(TCP_USER_TIMEOUT) Protocol not available
[2m[33m(pid=raylet)[0m E0312 11:11:13.948817100    3137 socket_utils_common_posix.cc:313] setsockopt(TCP_USER_TIMEOUT) Protocol not available
[2m[36m(pid=3153)[0m E0312 11:11:16.148637200    3153 socket_utils_common_posix.cc:208] check for SO_REUSEPORT: {"created":"@1584025876.148622800","description":"Protocol not available","errno":92,"file":"external/com_github_grpc_grpc/src/core/lib/iomgr/socket_utils_common_posix.cc","file_line":185,"os_error":"Protocol not ava

[2m[36m(pid=3151)[0m   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
[2m[36m(pid=3151)[0m   _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
[2m[36m(pid=3151)[0m   _np_qint16 = np.dtype([("qint16", np.int16, 1)])
[2m[36m(pid=3151)[0m   _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
[2m[36m(pid=3151)[0m   _np_qint32 = np.dtype([("qint32", np.int32, 1)])
[2m[36m(pid=3151)[0m   np_resource = np.dtype([("resource", np.ubyte, 1)])
[2m[36m(pid=3153)[0m   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
[2m[36m(pid=3153)[0m   _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
[2m[36m(pid=3153)[0m   _np_qint16 = np.dtype([("qint16", np.int16, 1)])
[2m[36m(pid=3153)[0m   _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
[2m[36m(pid=3153)[0m   _np_qint32 = np.dtype([("qint32", np.int32, 1)])
[2m[36m(pid=3153)[0m   np_resource = np.dtype([("resource", np.ubyte, 1)])
[2m[36m(pid=3152)[0m 2020-03-12 11:11:31,620	INFO trainable.py:178 -- _setup took 10.085 se











[2m[36m(pid=3153)[0m VALUE ERROR VEL
[2m[36m(pid=3153)[0m VALUE ERROR VEL
[2m[36m(pid=3153)[0m VALUE ERROR VEL






[2m[36m(pid=3151)[0m VALUE ERROR VEL
[2m[36m(pid=3151)[0m VALUE ERROR VEL
[2m[36m(pid=3151)[0m VALUE ERROR VEL
[2m[36m(pid=3151)[0m VALUE ERROR VEL
[2m[36m(pid=3151)[0m VALUE ERROR VEL
[2m[36m(pid=3151)[0m VALUE ERROR VEL
[2m[36m(pid=3151)[0m VALUE ERROR VEL
[2m[36m(pid=3151)[0m VALUE ERROR VEL
[2m[36m(pid=3151)[0m VALUE ERROR VEL




[2m[36m(pid=3151)[0m VALUE ERROR VEL
[2m[36m(pid=3151)[0m VALUE ERROR VEL
[2m[36m(pid=3151)[0m VALUE ERROR VEL
[2m[36m(pid=3151)[0m VALUE ERROR VEL
[2m[36m(pid=3151)[0m VALUE ERROR VEL
[2m[36m(pid=3151)[0m VALUE ERROR VEL
[2m[36m(pid=3151)[0m VALUE ERROR VEL
[2m[36m(pid=3151)[0m VALUE ERROR VEL
[2m[36m(pid=3151)[0m VALUE ERROR VEL






Result for PPO_myEnv-v0_b75e2a10:
  custom_metrics: {}
  date: 2020-03-12_11-29-47
  done: false
  episode_len_mean: 1500.0
  episode_reward_max: 14107.86687494436
  episode_reward_mean: -4224671.552911492
  episode_reward_min: -39022649.25369655
  episodes_this_iter: 20
  episodes_total: 20
  experiment_id: c2fe1db405f5493ca5d7e1dd58628c28
  experiment_tag: '0'
  hostname: DESKTOP-3S8E765
  info:
    grad_time_ms: 5796.09
    learner:
      default_policy:
        cur_kl_coeff: 0.20000000298023224
        cur_lr: 4.999999873689376e-05
        entropy: 2.8335816860198975
        entropy_coeff: 0.0
        kl: 0.0005130899953655899
        policy_loss: -0.0014020774979144335
        total_loss: 916716912640.0
        vf_explained_var: 6.793401325921877e-07
        vf_loss: 916716912640.0
    load_time_ms: 1272.953
    num_steps_sampled: 30000
    num_steps_trained: 29952
    sample_time_ms: 1087811.487
    update_time_ms: 869.056
  iterations_since_restore: 1
  node_ip: 160.39.217.237
 

Trial name,status,loc,iter,total time (s),timesteps,reward
PPO_myEnv-v0_b75e2a10,RUNNING,160.39.217.237:3152,1,1095.87,30000,-4224670.0




2020-03-12 11:31:23,325	ERROR trial_runner.py:480 -- Trial PPO_myEnv-v0_b75e2a10: Error processing event.
Traceback (most recent call last):
  File "/home/llavezzo/anaconda2/envs/flow/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 424, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/home/llavezzo/anaconda2/envs/flow/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 377, in fetch_result
    result = ray.get(trial_future[0], DEFAULT_GET_TIMEOUT)
  File "/home/llavezzo/anaconda2/envs/flow/lib/python3.6/site-packages/ray/worker.py", line 1492, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(FileNotFoundError): [36mray::PPO.train()[39m (pid=3152, ip=160.39.217.237)
  File "python/ray/_raylet.pyx", line 643, in ray._raylet.execute_task
  File "python/ray/_raylet.pyx", line 623, in function_executor
  File "/home/llavezzo/anaconda2/envs/flow/lib/python3.6/site-packages/ray/rllib/agents/trainer.py", line 44

Trial name,status,loc,iter,total time (s),timesteps,reward
PPO_myEnv-v0_b75e2a10,RUNNING,,1,1095.87,30000,-4224670.0

Trial name,# failures,error file
PPO_myEnv-v0_b75e2a10,1,/home/llavezzo/ray_results/construction_traffic/PPO_myEnv-v0_b75e2a10_2020-03-12_11-11-145ouugecp/error.txt




[2m[36m(pid=3411)[0m E0312 11:31:31.193927700    3411 socket_utils_common_posix.cc:208] check for SO_REUSEPORT: {"created":"@1584027091.193904400","description":"Protocol not available","errno":92,"file":"external/com_github_grpc_grpc/src/core/lib/iomgr/socket_utils_common_posix.cc","file_line":185,"os_error":"Protocol not available","syscall":"getsockopt(SO_REUSEPORT)"}
[2m[36m(pid=3411)[0m E0312 11:31:31.194239900    3411 socket_utils_common_posix.cc:313] setsockopt(TCP_USER_TIMEOUT) Protocol not available
[2m[36m(pid=3414)[0m E0312 11:31:31.292740100    3414 socket_utils_common_posix.cc:208] check for SO_REUSEPORT: {"created":"@1584027091.292721700","description":"Protocol not available","errno":92,"file":"external/com_github_grpc_grpc/src/core/lib/iomgr/socket_utils_common_posix.cc","file_line":185,"os_error":"Protocol not available","syscall":"getsockopt(SO_REUSEPORT)"}
[2m[36m(pid=3414)[0m E0312 11:31:31.292991600    3414 socket_utils_common_posix.cc:313] setsockopt(T