In [None]:
import time
import os

Test if we are running in a notebook<br>
https://stackoverflow.com/questions/15411967/how-can-i-check-if-code-is-executed-in-the-ipython-notebook

In [None]:
in_notebook = False
try:
    # get_ipython() is only defined inside ipython; we have to ignore it in flake8 with noqa
    get_ipython()  # noqa F821
    in_notebook = True
except(NameError):
    in_notebook = False 

Make the cells wider than the default:

In [None]:
if in_notebook:
    display(HTML("<style>.container { width:95% !important; }</style>"))
    if not os.path.exists("flatland-installed.flag"):
        print("Running install steps for Google Colab - assuming apt works!")
        os.system("apt-get install -y xvfb python-opengl")
        os.system("apt install x11-utils")
        os.system("pip install pyvirtualdisplay")
        os.system("pip install flatland-rl")
        os.system("touch ./flatland-installed.flag")
        print("You may need to restart runtime on Colab now...")
    else:
        print("Looks like flatland-rl and reqts are already installed - skipping install")
    print("Starting Xorg xvfb virtual display for Colab")
    import pyvirtualdisplay
    xdisplay = pyvirtualdisplay.Display(visible=0, size=(400, 300))
    print(xdisplay.start())

In [None]:
import numpy as np  # noqa e402

In Flatland you can use custom observation builders and predicitors<br>
Observation builders generate the observation needed by the controller<br>
Preditctors can be used to do short time prediction which can help in avoiding conflicts in the network

In [None]:
from flatland.envs.malfunction_generators import malfunction_from_params, MalfunctionParameters
from flatland.envs.observations import GlobalObsForRailEnv
# First of all we import the Flatland rail environment
from flatland.envs.rail_env import RailEnv
from flatland.envs.rail_env import RailEnvActions
from flatland.envs.rail_generators import sparse_rail_generator
from flatland.envs.schedule_generators import sparse_schedule_generator
# We also include a renderer because we want to visualize what is going on in the environment
from flatland.utils.rendertools import RenderTool, AgentRenderVariant

These are used in the notebook version of this code, but not the plain python

In [None]:
from IPython.core.display import display, HTML, clear_output
import PIL

This is an introduction example for the Flatland 2.1.* version.<br>
Changes and highlights of this version include<br>
- Stochastic events (malfunctions)<br>
- Different travel speeds for differet agents<br>
- Levels are generated using a novel generator to reflect more realistic railway networks<br>
- Agents start outside of the environment and enter at their own time<br>
- Agents leave the environment after they have reached their goal<br>
Use the new sparse_rail_generator to generate feasible network configurations with corresponding tasks<br>
Training on simple small tasks is the best way to get familiar with the environment<br>
We start by importing the necessary rail and schedule generators<br>
The rail generator will generate the railway infrastructure<br>
The schedule generator will assign tasks to all the agent within the railway network

The railway infrastructure can be build using any of the provided generators in env/rail_generators.py<br>
Here we use the sparse_rail_generator with the following parameters

In [None]:
if in_notebook:  # use a smaller map in the notebook
    width = 30  # With of map
    height = 30  # Height of map
    nr_trains = 5  # Number of trains that have an assigned task in the env
    cities_in_map = 2  # Number of cities where agents can start or end
else:
    width = 16 * 7  # With of map
    height = 9 * 7  # Height of map
    nr_trains = 50  # Number of trains that have an assigned task in the env
    cities_in_map = 20  # Number of cities where agents can start or end
    
seed = 14  # Random seed
grid_distribution_of_cities = False  # Type of city distribution, if False cities are randomly placed
max_rails_between_cities = 2  # Max number of tracks allowed between cities. This is number of entry point to a city
max_rail_in_cities = 6  # Max number of parallel tracks within a city, representing a realistic trainstation

In [None]:
rail_generator = sparse_rail_generator(max_num_cities=cities_in_map,
                                       seed=seed,
                                       grid_mode=grid_distribution_of_cities,
                                       max_rails_between_cities=max_rails_between_cities,
                                       max_rails_in_city=max_rail_in_cities,
                                       )

The schedule generator can make very basic schedules with a start point, end point and a speed profile for each agent.<br>
The speed profiles can be adjusted directly as well as shown later on. We start by introducing a statistical<br>
distribution of speed profiles

Different agent types (trains) with different speeds.

In [None]:
speed_ration_map = {1.: 0.25,  # Fast passenger train
                    1. / 2.: 0.25,  # Fast freight train
                    1. / 3.: 0.25,  # Slow commuter train
                    1. / 4.: 0.25}  # Slow freight train

We can now initiate the schedule generator with the given speed profiles

In [None]:
schedule_generator = sparse_schedule_generator(speed_ration_map)

We can furthermore pass stochastic data to the RailEnv constructor which will allow for stochastic malfunctions<br>
during an episode.

In [None]:
stochastic_data = MalfunctionParameters(malfunction_rate=10000,  # Rate of malfunction occurence
                                        min_duration=15,  # Minimal duration of malfunction
                                        max_duration=50  # Max duration of malfunction
                                        )
# Custom observation builder without predictor
observation_builder = GlobalObsForRailEnv()

Custom observation builder with predictor, uncomment line below if you want to try this one<br>
observation_builder = TreeObsForRailEnv(max_depth=2, predictor=ShortestPathPredictorForRailEnv())

Construct the enviornment with the given observation, generataors, predictors, and stochastic data

In [None]:
env = RailEnv(width=width,
              height=height,
              rail_generator=rail_generator,
              schedule_generator=schedule_generator,
              number_of_agents=nr_trains,
              obs_builder_object=observation_builder,
              malfunction_generator_and_process_data=malfunction_from_params(stochastic_data),
              remove_agents_at_target=True)
env.reset()

Initiate the renderer

In [None]:
env_renderer = RenderTool(env, gl="PILSVG",
                          agent_render_variant=AgentRenderVariant.ONE_STEP_BEHIND,
                          show_debug=False,
                          screen_height=600,  # Adjust these parameters to fit your resolution
                          screen_width=800)  # Adjust these parameters to fit your resolution

The first thing we notice is that some agents don't have feasible paths to their target.<br>
We first look at the map we have created

nv_renderer.render_env(show=True)<br>
time.sleep(2)<br>
Import your own Agent or use RLlib to train agents on Flatland<br>
As an example we use a random agent instead

In [None]:
class RandomAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
    def act(self, state):
        """
        :param state: input is the observation of the agent
        :return: returns an action
        """
        return np.random.choice([RailEnvActions.MOVE_FORWARD, RailEnvActions.MOVE_RIGHT, RailEnvActions.MOVE_LEFT,
                                 RailEnvActions.STOP_MOVING])
    def step(self, memories):
        """
        Step function to improve agent by adjusting policy given the observations
        :param memories: SARS Tuple to be
        :return:
        """
        return
    def save(self, filename):
        # Store the current policy
        return
    def load(self, filename):
        # Load a policy
        return

Initialize the agent with the parameters corresponding to the environment and observation_builder

In [None]:
controller = RandomAgent(218, env.action_space[0])

We start by looking at the information of each agent<br>
We can see the task assigned to the agent by looking at

In [None]:
print("\n Agents in the environment have to solve the following tasks: \n")
for agent_idx, agent in enumerate(env.agents):
    print(
        "The agent with index {} has the task to go from its initial position {}," +
        "facing in the direction {} to its target at {}.".format(
            agent_idx, agent.initial_position, agent.direction, agent.target))

The agent will always have a status indicating if it is currently present in the environment or done or active<br>
For example we see that agent with index 0 is currently not active

In [None]:
print("\n Their current statuses are:")
print("============================")

In [None]:
for agent_idx, agent in enumerate(env.agents):
    print("Agent {} status is: {} with its current position being {}".format(agent_idx, str(agent.status),
                                                                             str(agent.position)))

The agent needs to take any action [1,2,3] except do_nothing or stop to enter the level<br>
If the starting cell is free they will enter the level<br>
If multiple agents want to enter the same cell at the same time the lower index agent will enter first.

Let's check if there are any agents with the same start location

In [None]:
agents_with_same_start = set()
print("\n The following agents have the same initial position:")
print("=====================================================")
for agent_idx, agent in enumerate(env.agents):
    for agent_2_idx, agent2 in enumerate(env.agents):
        if agent_idx != agent_2_idx and agent.initial_position == agent2.initial_position:
            print("Agent {} as the same initial position as agent {}".format(agent_idx, agent_2_idx))
            agents_with_same_start.add(agent_idx)

Lets try to enter with all of these agents at the same time

In [None]:
action_dict = dict()

In [None]:
for agent_id in agents_with_same_start:
    action_dict[agent_id] = 1  # Try to move with the agents

Do a step in the environment to see what agents entered:

In [None]:
env.step(action_dict)

Current state and position of the agents after all agents with same start position tried to move

In [None]:
print("\n This happened when all tried to enter at the same time:")
print("========================================================")
for agent_id in agents_with_same_start:
    print(
        "Agent {} status is: {} with the current position being {}.".format(
            agent_id, str(env.agents[agent_id].status),
            str(env.agents[agent_id].position)))

As you see only the agents with lower indexes moved. As soon as the cell is free again the agents can attempt<br>
to start again.

You will also notice, that the agents move at different speeds once they are on the rail.<br>
The agents will always move at full speed when moving, never a speed inbetween.<br>
The fastest an agent can go is 1, meaning that it moves to the next cell at every time step<br>
All slower speeds indicate the fraction of a cell that is moved at each time step<br>
Lets look at the current speed data of the agents:

In [None]:
print("\n The speed information of the agents are:")
print("=========================================")

In [None]:
for agent_idx, agent in enumerate(env.agents):
    print(
        "Agent {} speed is: {:.2f} with the current fractional position being {}".format(
            agent_idx, agent.speed_data['speed'], agent.speed_data['position_fraction']))

New the agents can also have stochastic malfunctions happening which will lead to them being unable to move<br>
for a certain amount of time steps. The malfunction data of the agents can easily be accessed as follows

In [None]:
print("\n The malfunction data of the agents are:")
print("========================================")

In [None]:
for agent_idx, agent in enumerate(env.agents):
    print(
        "Agent {} is OK = {}".format(
            agent_idx, agent.malfunction_data['malfunction'] < 1))

Now that you have seen these novel concepts that were introduced you will realize that agents don't need to take<br>
an action at every time step as it will only change the outcome when actions are chosen at cell entry.<br>
Therefore the environment provides information about what agents need to provide an action in the next step.<br>
You can access this in the following way.

Chose an action for each agent

In [None]:
for a in range(env.get_num_agents()):
    action = controller.act(0)
    action_dict.update({a: action})
# Do the environment step
observations, rewards, dones, information = env.step(action_dict)
print("\n The following agents can register an action:")
print("========================================")
for info in information['action_required']:
    print("Agent {} needs to submit an action.".format(info))

We recommend that you monitor the malfunction data and the action required in order to optimize your training<br>
and controlling code.

Let us now look at an episode playing out with random actions performed

In [None]:
print("\nStart episode...")

Reset the rendering system

In [None]:
env_renderer.reset()

Here you can also further enhance the provided observation by means of normalization<br>
See training navigation example in the baseline repository

In [None]:
score = 0
# Run episode
frame_step = 0

In [None]:
if in_notebook:
    nSteps = 5  # just 5 steps at a time - rerun the cell for more steps
else:
    nSteps = 500

In [None]:
for step in range(nSteps):
    # Chose an action for each agent in the environment
    for a in range(env.get_num_agents()):
        action = controller.act(observations[a])
        action_dict.update({a: action})

    # Environment step which returns the observations for all agents, their corresponding
    # reward and whether their are done
    next_obs, all_rewards, done, _ = env.step(action_dict)
    env_renderer.render_env(show=not in_notebook, show_observations=False, show_predictions=False)
    # env_renderer.gl.save_image('./misc/Fames2/flatland_frame_{:04d}.png'.format(step))
    frame_step += 1
    # Update replay buffer and train agent
    for a in range(env.get_num_agents()):
        controller.step((observations[a], action_dict[a], all_rewards[a], next_obs[a], done[a]))
        score += all_rewards[a]
    observations = next_obs.copy()
    if done['__all__']:
        break
    print('Episode: Steps {}\t Score = {}'.format(step, score))
    if in_notebook:
        arrImage = env_renderer.get_image()
        pilImage = PIL.Image.fromarray(arrImage)
        clear_output()
        display(pilImage)
        time.sleep(0.3)
    