In [1]:
import json
import ray
from ray.rllib.agents.registry import get_agent_class
from ray.tune import run_experiments
from ray.tune.registry import register_env
import numpy as np
from flow.networks.ring import RingNetwork, ADDITIONAL_NET_PARAMS
from flow.utils.registry import make_create_env
from flow.utils.rllib import FlowParamsEncoder
from flow.core.params import SumoParams, EnvParams, InitialConfig, NetParams
from flow.core.params import VehicleParams, SumoCarFollowingParams
from flow.controllers import RLController, IDMController, ContinuousRouter
from gym.spaces.box import Box
from gym.spaces import Tuple
from flow.core.params import InFlows
from flow.controllers import SimLaneChangeController
from flow.networks import Network
import os
from flow.core.params import SumoLaneChangeParams

from env_merge import myEnv

ADDITIONAL_ENV_PARAMS = {
    "max_accel": 1,
    "max_decel": 1,
}

# time horizon of a single rollout
HORIZON = 1000
# number of rollouts per training iteration
N_ROLLOUTS = 20
# number of parallel workers
N_CPUS = 2

vehicles = VehicleParams()
vehicles.add("rl",
             acceleration_controller=(IDMController, {}),
             lane_change_controller=(SimLaneChangeController, {}),
             #routing_controller=(ContinuousRouter, {}),
             car_following_params=SumoCarFollowingParams(
                 speed_mode="obey_safe_speed",  
                 # we use the speed mode "obey_safe_speed" for better dynamics at the merge
             ),
             num_vehicles=0)
vehicles.add("human",
             acceleration_controller=(IDMController, {}),
             lane_change_controller=(SimLaneChangeController, {}),
             #routing_controller=(ContinuousRouter, {}),
             car_following_params=SumoCarFollowingParams(
                 speed_mode="obey_safe_speed",  
                 # we use the speed mode "obey_safe_speed" for better dynamics at the merge
             ),
             lane_change_params = SumoLaneChangeParams(lane_change_mode="strategic",lcpushy=1.0),
             num_vehicles=0)

# specify the edges vehicles can originate on
initial_config = InitialConfig(
    edges_distribution=["gneE6","gneE7"]
)
    
# specify the routes for vehicles in the network
class Network(Network):

    def specify_routes(self, net_params):
        return {
                "gneE7": ["gneE7","gneE6.243"],
                "gneE6": ["gneE6","gneE6.243"]
               }


inflow = InFlows()

#highway vehicles
inflow.add(veh_type="human",
           edge="gneE6",
           vehs_per_hour=5000,
            depart_lane="random",
            depart_speed="random",
            color="white")

#merging vehicles
inflow.add(veh_type="rl",
           edge="gneE7",
           vehs_per_hour=1000,
            depart_lane="random",
            depart_speed="random",
            color="blue")
inflow.add(veh_type="rl",
           edge="gneE7",
           vehs_per_hour=500,
            depart_lane="random",
            depart_speed="random",
            color="red")


fileDir='/mnt/c/Users/llave/Documents/GitHub/flow_osuphysics/lucalavezzo/merge.net.xml'
net_params = NetParams(
    template=fileDir,
    inflows=inflow
)

flow_params = dict(
    # name of the experiment
    exp_tag="merge",

    # name of the flow environment the experiment is running on
    env_name=myEnv,  # <------ here we replace the environment with our new environment

    # name of the network class the experiment is running on
    network=Network,

    # simulator that is used by the experiment
    simulator='traci',

    # sumo-related parameters (see flow.core.params.SumoParams)
    sim=SumoParams(
        sim_step=0.1,
        render=False,
        restart_instance=True,
    ),

    # environment related parameters (see flow.core.params.EnvParams)
    env=EnvParams(
        horizon=HORIZON,
        warmup_steps=750,
        clip_actions=False,
        additional_params={
            "target_velocity": 20,
            "sort_vehicles": False,
            "max_accel": 1,
            "max_decel": 1,
        },
    ),

    # network-related parameters (see flow.core.params.NetParams and the
    # network's documentation or ADDITIONAL_NET_PARAMS component)
    net=net_params,

    # vehicles to be placed in the network at the start of a rollout (see
    # flow.core.params.VehicleParams)
    veh=vehicles,

    # parameters specifying the positioning of vehicles upon initialization/
    # reset (see flow.core.params.InitialConfig)
    initial=initial_config
)


def setup_exps():
    """Return the relevant components of an RLlib experiment.

    Returns
    -------
    str
        name of the training algorithm
    str
        name of the gym environment to be trained
    dict
        training configuration parameters
    """
    alg_run = "PPO"

    agent_cls = get_agent_class(alg_run)
    config = agent_cls._default_config.copy()
    config["num_workers"] = N_CPUS
    config["train_batch_size"] = HORIZON * N_ROLLOUTS
    config["gamma"] = 0.999  # discount rate
    config["model"].update({"fcnet_hiddens": [3, 3]})
    config["use_gae"] = True
    config["lambda"] = 0.97
    config["kl_target"] = 0.02
    config["num_sgd_iter"] = 10
    config['clip_actions'] = False  # FIXME(ev) temporary ray bug
    config["horizon"] = HORIZON

    # save the flow params for replay
    flow_json = json.dumps(
        flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4)
    config['env_config']['flow_params'] = flow_json
    config['env_config']['run'] = alg_run

    create_env, gym_name = make_create_env(params=flow_params, version=0)

    # Register as rllib env
    register_env(gym_name, create_env)
    return alg_run, gym_name, config




  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [None]:
alg_run, gym_name, config = setup_exps()
ray.init(num_cpus=N_CPUS + 1)
trials = run_experiments({
    flow_params["exp_tag"]: {
        "run": alg_run,
        "env": gym_name,
        "config": {
            **config
        },
        "checkpoint_freq": 20,
        "checkpoint_at_end": True,
        "max_failures": 999,
        "stop": {
            "training_iteration": 200,
        },
    }
})


2020-03-17 11:37:40,105	INFO resource_spec.py:212 -- Starting Ray with 5.03 GiB memory available for workers and up to 2.54 GiB for objects. You can adjust these settings with ray.init(memory=<bytes>, object_store_memory=<bytes>).


Trial name,status,loc
PPO_myEnv-v0_3d68d76e,RUNNING,


[2m[36m(pid=568)[0m E0317 11:37:43.216030400     568 socket_utils_common_posix.cc:208] check for SO_REUSEPORT: {"created":"@1584459463.216008800","description":"Protocol not available","errno":92,"file":"external/com_github_grpc_grpc/src/core/lib/iomgr/socket_utils_common_posix.cc","file_line":185,"os_error":"Protocol not available","syscall":"getsockopt(SO_REUSEPORT)"}
[2m[36m(pid=568)[0m E0317 11:37:43.216359800     568 socket_utils_common_posix.cc:313] setsockopt(TCP_USER_TIMEOUT) Protocol not available
[2m[36m(pid=569)[0m E0317 11:37:43.257234500     569 socket_utils_common_posix.cc:208] check for SO_REUSEPORT: {"created":"@1584459463.257203800","description":"Protocol not available","errno":92,"file":"external/com_github_grpc_grpc/src/core/lib/iomgr/socket_utils_common_posix.cc","file_line":185,"os_error":"Protocol not available","syscall":"getsockopt(SO_REUSEPORT)"}
[2m[36m(pid=569)[0m E0317 11:37:43.257604500     569 socket_utils_common_posix.cc:313] setsockopt(TCP_U

[2m[36m(pid=568)[0m   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
[2m[36m(pid=568)[0m   _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
[2m[36m(pid=568)[0m   _np_qint16 = np.dtype([("qint16", np.int16, 1)])
[2m[36m(pid=568)[0m   _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
[2m[36m(pid=568)[0m   _np_qint32 = np.dtype([("qint32", np.int32, 1)])
[2m[36m(pid=568)[0m   np_resource = np.dtype([("resource", np.ubyte, 1)])
[2m[36m(pid=569)[0m   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
[2m[36m(pid=569)[0m   _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
[2m[36m(pid=569)[0m   _np_qint16 = np.dtype([("qint16", np.int16, 1)])
[2m[36m(pid=569)[0m   _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
[2m[36m(pid=569)[0m   _np_qint32 = np.dtype([("qint32", np.int32, 1)])
[2m[36m(pid=569)[0m   np_resource = np.dtype([("resource", np.ubyte, 1)])
[2m[36m(pid=567)[0m 2020-03-17 11:38:00,023	INFO trainable.py:178 -- _setup took 11.417 seconds. If you

[2m[36m(pid=569)[0m   out=out, **kwargs)
[2m[36m(pid=569)[0m   ret = ret.dtype.type(ret / rcount)
[2m[36m(pid=568)[0m   out=out, **kwargs)
[2m[36m(pid=568)[0m   ret = ret.dtype.type(ret / rcount)




[2m[36m(pid=569)[0m   out=out, **kwargs)
[2m[36m(pid=569)[0m   ret = ret.dtype.type(ret / rcount)
[2m[36m(pid=568)[0m   out=out, **kwargs)
[2m[36m(pid=568)[0m   ret = ret.dtype.type(ret / rcount)


[2m[36m(pid=569)[0m   out=out, **kwargs)
[2m[36m(pid=569)[0m   ret = ret.dtype.type(ret / rcount)
[2m[36m(pid=568)[0m   out=out, **kwargs)
[2m[36m(pid=568)[0m   ret = ret.dtype.type(ret / rcount)


[2m[36m(pid=569)[0m   out=out, **kwargs)
[2m[36m(pid=569)[0m   ret = ret.dtype.type(ret / rcount)
[2m[36m(pid=568)[0m   out=out, **kwargs)
[2m[36m(pid=568)[0m   ret = ret.dtype.type(ret / rcount)




[2m[36m(pid=568)[0m   out=out, **kwargs)
[2m[36m(pid=568)[0m   ret = ret.dtype.type(ret / rcount)
[2m[36m(pid=569)[0m   out=out, **kwargs)
[2m[36m(pid=569)[0m   ret = ret.dtype.type(ret / rcount)


[2m[36m(pid=568)[0m   out=out, **kwargs)
[2m[36m(pid=568)[0m   ret = ret.dtype.type(ret / rcount)
[2m[36m(pid=569)[0m   out=out, **kwargs)
[2m[36m(pid=569)[0m   ret = ret.dtype.type(ret / rcount)


[2m[36m(pid=568)[0m   out=out, **kwargs)
[2m[36m(pid=568)[0m   ret = ret.dtype.type(ret / rcount)
[2m[36m(pid=569)[0m   out=out, **kwargs)
[2m[36m(pid=569)[0m   ret = ret.dtype.type(ret / rcount)


[2m[36m(pid=568)[0m   out=out, **kwargs)
[2m[36m(pid=568)[0m   ret = ret.dtype.type(ret / rcount)
[2m[36m(pid=569)[0m   out=out, **kwargs)
[2m[36m(pid=569)[0m   ret = ret.dtype.type(ret / rcount)




[2m[36m(pid=568)[0m   out=out, **kwargs)
[2m[36m(pid=568)[0m   ret = ret.dtype.type(ret / rcount)
[2m[36m(pid=569)[0m   out=out, **kwargs)
[2m[36m(pid=569)[0m   ret = ret.dtype.type(ret / rcount)


[2m[36m(pid=568)[0m   out=out, **kwargs)
[2m[36m(pid=568)[0m   ret = ret.dtype.type(ret / rcount)
[2m[36m(pid=569)[0m   out=out, **kwargs)
[2m[36m(pid=569)[0m   ret = ret.dtype.type(ret / rcount)
Result for PPO_myEnv-v0_3d68d76e:
  custom_metrics: {}
  date: 2020-03-17_11-43-44
  done: false
  episode_len_mean: 1000.0
  episode_reward_max: 9120.03751308244
  episode_reward_mean: 8702.64232763623
  episode_reward_min: 8309.833247120281
  episodes_this_iter: 20
  episodes_total: 20
  experiment_id: 9b2e14eaa3c748b59a98b0b0611433bb
  experiment_tag: '0'
  hostname: DESKTOP-3S8E765
  info:
    grad_time_ms: 6021.699
    learner:
      default_policy:
        cur_kl_coeff: 0.20000000298023224
        cur_lr: 4.999999873689376e-05
        entropy: 0.0
        entropy_coeff: 0.0
        kl: 0.0
        policy_loss: 9.421946742804721e-05
        total_loss: 60278.8203125
        vf_explained_var: -3.839914484160545e-07
        vf_loss: 60278.8203125
    load_time_ms: 1592.294
    n

Trial name,status,loc,iter,total time (s),timesteps,reward
PPO_myEnv-v0_3d68d76e,RUNNING,192.168.0.24:567,1,344.157,20000,8702.64


[2m[36m(pid=568)[0m   out=out, **kwargs)
[2m[36m(pid=568)[0m   ret = ret.dtype.type(ret / rcount)
[2m[36m(pid=569)[0m   out=out, **kwargs)
[2m[36m(pid=569)[0m   ret = ret.dtype.type(ret / rcount)


[2m[36m(pid=568)[0m   out=out, **kwargs)
[2m[36m(pid=568)[0m   ret = ret.dtype.type(ret / rcount)


[2m[36m(pid=569)[0m   out=out, **kwargs)
[2m[36m(pid=569)[0m   ret = ret.dtype.type(ret / rcount)


