# Building the Flow experiment

To get a sense of what goes into a flow params object:

In [None]:
"""
Most of the code in this notebook is taken from the various parts of the Flow codebase.
https://github.com/flow-project/flow
"""

from flow.benchmarks.grid0 import flow_params

In [None]:
# See what is inside the flow_params object.
flow_params

In [None]:
# To further analyze:
print(dir(flow_params['net']))
flow_params['net'].additional_params

In [None]:
# You can run an experiment with the existing settings as follows:
from flow.core.experiment import Experiment

sim_params = flow_params['sim']
sim_params.render = True

exp = Experiment(flow_params)
results = exp.run(1)

# Getting a baseline

In [None]:
# https://github.com/flow-project/flow/blob/master/flow/benchmarks/baselines/grid0.py

import numpy as np
from flow.core.experiment import Experiment
from flow.core.params import TrafficLightParams
from flow.benchmarks.grid0 import flow_params
from flow.benchmarks.grid0 import N_ROWS
from flow.benchmarks.grid0 import N_COLUMNS

num_runs = 1

env_params = flow_params['env']

# Optimized traffic light phases
tl_logic = TrafficLightParams(baseline=False)

phases = [{"duration": "31", "minDur": "8", "maxDur": "45",
           "state": "GrGr"},
          {"duration": "6", "minDur": "3", "maxDur": "6",
           "state": "yryr"},
          {"duration": "31", "minDur": "8", "maxDur": "45",
           "state": "rGrG"},
          {"duration": "6", "minDur": "3", "maxDur": "6",
           "state": "ryry"}]

for i in range(N_ROWS * N_COLUMNS):
    tl_logic.add('center'+str(i), tls_type='actuated', phases=phases,
                 programID=1)

flow_params['tls'] = tl_logic

# Enable visualization or not
sim_params.render = True

# Set the evaluation flag to True
env_params.evaluate = True

flow_params['env'].horizon = env_params.horizon
exp = Experiment(flow_params)

results = exp.run(num_runs)
total_delay = np.mean(results['returns'])
print(f"The total delay across {num_runs} runs is {total_delay}")

# RL Solution

In [None]:
# Set up the multi-agent env and variables

from ray.rllib.agents.ppo.ppo_policy import PPOTFPolicy
from ray.tune.registry import register_env
from flow.utils.registry import make_create_env
from flow.envs.multiagent import MultiTrafficLightGridPOEnv

# Update the Flow params for the MARL training
env_params.evaluate = False
env_params.additional_params.update({
    "tl_type": 'controlled',
    "num_local_edges": 4,
    "num_local_lights": 4})
sim_params.render = False
if "tls" in flow_params:
    del flow_params['tls']
flow_params["env_name"] = MultiTrafficLightGridPOEnv

create_env, env_name = make_create_env(params=flow_params, 
                                       version=0)

# Register as rllib env
register_env(env_name, create_env)

test_env = create_env()
obs_space = test_env.observation_space
act_space = test_env.action_space


def gen_policy():
    """Generate a policy in RLlib."""
    return PPOTFPolicy, obs_space, act_space, {}


def policy_mapping_fn(_):
    """Map a policy in RLlib."""
    return 'tlight'


# Setup PG with a single policy graph for all agents
policy_graphs = {'tlight': gen_policy()}

policies_to_train = ['tlight']

In [None]:
import json
from copy import deepcopy

from flow.utils.rllib import FlowParamsEncoder
from ray import tune
from ray.tune.registry import register_env
try:
    from ray.rllib.agents.agent import get_agent_class
except ImportError:
    from ray.rllib.agents.registry import get_agent_class

n_rollouts = 50
n_cpus = 50
n_gpus = 1


horizon = flow_params['env'].horizon

alg_run = "PPO"

agent_cls = get_agent_class(alg_run)
config = deepcopy(agent_cls._default_config)

config["num_workers"] = n_cpus
config["num_gpus"] = n_gpus
config["train_batch_size"] = horizon * n_rollouts
config["gamma"] = 0.999  # discount rate
config["model"].update({"fcnet_hiddens": [64, 64]})
config["use_gae"] = True
config["lambda"] = 0.97
config["kl_target"] = 0.02
config["num_sgd_iter"] = 10
config["horizon"] = horizon
# save the flow params for replay
flow_json = json.dumps(
    flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4)
config['env_config']['flow_params'] = flow_json
config['env_config']['run'] = alg_run

# multiagent configuration
config['multiagent'].update({'policies': policy_graphs})
config['multiagent'].update({'policy_mapping_fn': 
                             policy_mapping_fn})
config['multiagent'].update({'policies_to_train': 
                             policies_to_train})

In [None]:
import ray
from ray.tune import run_experiments
ray.init()
exp_config = {
    "run": alg_run,
    "env": env_name,
    "config": {
        **config
    },
    "checkpoint_freq": 5,
    "checkpoint_at_end": True,
    "max_failures": 999,
    "stop": {
        "training_iteration": 100,
    },
}

In [None]:
run_experiments({flow_params["exp_tag"]: exp_config})

In [None]:
# Run a command with the paths changed to yours to visualize the policy
!python /home/enes/ws/envs/flow/flow/visualize/visualizer_rllib.py /home/enes/ray_results/grid_0_3x3_i300_multiagent/PPO_MultiTrafficLightGridPOEnv-v1_8fe43172_2020-10-15_20-27-47zvats9v8 100