In [1]:
from flow.core.params import SumoParams, EnvParams, \
    NetParams, InitialConfig, InFlows
from flow.core.vehicles import Vehicles
from flow.core.experiment import SumoExperiment
from flow.scenarios.merge.gen import MergeGenerator
from flow.scenarios.merge.scenario import MergeScenario, \
    ADDITIONAL_NET_PARAMS
from flow.controllers import IDMController
from flow.envs.merge import WaveAttenuationMergePOEnv, ADDITIONAL_ENV_PARAMS

# inflow rate at the highway
FLOW_RATE = 2000

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as T

In [3]:
torch.cuda.is_available()

False

# Initialize the Environment

In [3]:
sumo_params = SumoParams(
    sumo_binary="sumo-gui",
    emission_path="./data/",
    sim_step=0.2,
    restart_instance=True)

vehicles = Vehicles()
vehicles.add(
    veh_id="human",
    acceleration_controller=(IDMController, {
        "noise": 0.2
    }),
    speed_mode="no_collide",
    num_vehicles=5)

env_params = EnvParams(
    additional_params=ADDITIONAL_ENV_PARAMS,
    sims_per_step=5,
    warmup_steps=0)

inflow = InFlows()
inflow.add(
    veh_type="human",
    edge="inflow_highway",
    vehs_per_hour=FLOW_RATE,
    departLane="free",
    departSpeed=10)
inflow.add(
    veh_type="human",
    edge="inflow_merge",
    vehs_per_hour=100,
    departLane="free",
    departSpeed=7.5)

additional_net_params = ADDITIONAL_NET_PARAMS.copy()
additional_net_params["merge_lanes"] = 1
additional_net_params["highway_lanes"] = 1
additional_net_params["pre_merge_length"] = 500
net_params = NetParams(
    in_flows=inflow,
    no_internal_links=False,
    additional_params=additional_net_params)

initial_config = InitialConfig(spacing="uniform", perturbation=5.0)

scenario = MergeScenario(
    name="merge-baseline",
    generator_class=MergeGenerator,
    vehicles=vehicles,
    net_params=net_params,
    initial_config=initial_config)

env = WaveAttenuationMergePOEnv(env_params, sumo_params, scenario)

# States, Actions, Rewards, Terminates

* States: The observation consists of the speeds and bumper-to-bumper headways of the vehicles immediately preceding and following autonomous vehicle, as well as the ego speed of the autonomous vehicles. In order to maintain a fixed observation size, when the number of AVs in the network is less than "num_rl", the extra entries are filled in with zeros. Conversely, if the number of autonomous vehicles is greater than "num_rl", the observations from the additional vehicles are not included in the state space.

* Actions: The action space consists of a vector of bounded accelerations for each autonomous vehicle $i$. In order to ensure safety, these actions are bounded by failsafes provided by the simulator at every time step.  In order to account for variability in the number of autonomous vehicles, if n_AV < "num_rl" the additional actions provided by the agent are not assigned to any vehicle. Moreover, if n_AV > "num_rl", the additional vehicles are not provided with actions from the learning agent, and instead act as human-driven vehicles as well. 

* Rewards: The reward function encourages proximity of the system-level velocity to a desired velocity, while slightly penalizing small time headways among autonomous vehicles.

* Termination: A rollout is terminated if the time horizon is reached or if two vehicles collide into one another.

In [73]:
state = env.reset()

In [128]:
env.step(0)

(array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0]), 0.633883095776256, False, {})

In [72]:
env.close()