## Create the environment

In [1]:
import os
import json

import numpy as np

from flatland.envs.line_generators import sparse_line_generator
# In Flatland you can use custom observation builders and predicitors
# Observation builders generate the observation needed by the controller
# Preditctors can be used to do short time prediction which can help in avoiding conflicts in the network
from flatland.envs.malfunction_generators import MalfunctionParameters, ParamMalfunctionGen
from flatland.envs.observations import GlobalObsForRailEnv
# First of all we import the Flatland rail environment
from flatland.envs.rail_env import RailEnv
from flatland.envs.rail_env import RailEnvActions
from flatland.envs.rail_generators import sparse_rail_generator
# We also include a renderer because we want to visualize what is going on in the environment
from flatland.utils.rendertools import RenderTool, AgentRenderVariant

import re

In [2]:
# This is an introduction example for the Flatland 2.1.* version.
# Changes and highlights of this version include
# - Stochastic events (malfunctions)
# - Different travel speeds for differet agents
# - Levels are generated using a novel generator to reflect more realistic railway networks
# - Agents start outside of the environment and enter at their own time
# - Agents leave the environment after they have reached their goal

# Use the new sparse_rail_generator to generate feasible network configurations with corresponding tasks
# Training on simple small tasks is the best way to get familiar with the environment
# We start by importing the necessary rail and schedule generators
# The rail generator will generate the railway infrastructure
# The schedule generator will assign tasks to all the agent within the railway network

# The railway infrastructure can be built using any of the provided generators in env/rail_generators.py
# Here we use the sparse_rail_generator with the following parameters

DO_RENDERING = False

width = 24  # With of map
height = 24  # Height of map
nr_trains = 1  # Number of trains that have an assigned task in the env
cities_in_map = 2  # Number of cities where agents can start or end
seed = 14  # Random seed - 14
grid_distribution_of_cities = False  # Type of city distribution, if False cities are randomly placed
max_rails_between_cities = 2  # Max number of tracks allowed between cities. This is number of entry point to a city
max_rail_in_cities = 2  # Max number of parallel tracks within a city, representing a realistic trainstation

rail_generator = sparse_rail_generator(max_num_cities=cities_in_map,
                                       seed=seed,
                                       grid_mode=grid_distribution_of_cities,
                                       max_rails_between_cities=max_rails_between_cities,
                                       max_rail_pairs_in_city=max_rail_in_cities,
                                       )

# rail_generator = SparseRailGen(max_num_cities=cities_in_map,
#                                       seed=seed,
#                                       grid_mode=grid_distribution_of_cities,
#                                       max_rails_between_cities=max_rails_between_cities,
#                                       max_rails_in_city=max_rail_in_cities,
#                                       )

In [3]:
# The schedule generator can make very basic schedules with a start point, end point and a speed profile for each agent.
# The speed profiles can be adjusted directly as well as shown later on. We start by introducing a statistical
# distribution of speed profiles

# Different agent types (trains) with different speeds.
# speed_ration_map = {1.: 0.25,  # Fast passenger train
#                     1. / 2.: 0.25,  # Fast freight train
#                     1. / 3.: 0.25,  # Slow commuter train
#                     1. / 4.: 0.25}  # Slow freight train

speed_ration_map = {1: 1.0}

# We can now initiate the schedule generator with the given speed profiles

line_generator = sparse_line_generator()

In [4]:
# Custom observation builder without predictor
observation_builder = GlobalObsForRailEnv()

# Custom observation builder with predictor, uncomment line below if you want to try this one
# observation_builder = TreeObsForRailEnv(max_depth=2, predictor=ShortestPathPredictorForRailEnv())

# Construct the enviornment with the given observation, generataors, predictors, and stochastic data
env = RailEnv(width=width,
              height=height,
              rail_generator=rail_generator,
              line_generator=line_generator,
              number_of_agents=nr_trains,
              obs_builder_object=observation_builder,
              remove_agents_at_target=True)
env.reset()

# Initiate the renderer
env_renderer = None
if DO_RENDERING:
    env_renderer = RenderTool(env,
                              agent_render_variant=AgentRenderVariant.ONE_STEP_BEHIND,
                              show_debug=False,
                              screen_height=600,  # Adjust these parameters to fit your resolution
                              screen_width=800)  # Adjust these parameters to fit your resolution

?????
6 6
?????
7 6
?????
8 6
?????
9 6
?????
10 6
?????
11 6
?????
11 5
?????
11 4
?????
11 3
?????
11 2
?????
11 1
?????
10 1
?????
9 1
?????
8 1
?????
7 1
?????
6 1
?????
5 1
?????
4 1
?????
3 1
?????
2 1
?????
1 1
?????
1 2
?????
1 3
?????
1 4
?????
1 5
?????
1 6
?????
2 6
?????
3 6
?????
4 6
?????
5 6
?????
3 7
?????
4 7
?????
4 6
?????
5 7
?????
6 7
?????
7 7
?????
8 7
?????
9 7
?????
9 6
?????
12 6
?????
12 5
?????
13 5
?????
13 4
?????
13 3
?????
13 2
?????
13 1
?????
14 1
?????
15 1
?????
16 1
?????
17 1
?????
18 1
?????
19 1
?????
20 1
?????
21 1
?????
22 1
?????
23 1
?????
23 2
?????
23 3
?????
23 4
?????
23 5
?????
23 6
?????
22 6
?????
21 6
?????
21 7
?????
20 7
?????
20 6
?????
19 6
?????
18 6
?????
17 6
?????
16 6
?????
16 7
?????
15 7
?????
15 6
?????
14 6
?????
13 6
?????
12 6
?????
11 6
?????
11 7
?????
12 7
?????
13 7
?????
14 7
?????
15 7
?????
16 7
?????
16 6
?????
17 6
?????
18 6
?????
19 6
?????
20 6
?????
21 6
?????
22 6
?????
23 6
?????
23 5
?????
23 4
?????
23



In [5]:
# convert rail grid array to Clingo string
class clingo_grid():
    """representing the array of a Flatland map in a way that clingo can process"""
    
    def __init__(self, env):
        rail_map = env.rail.grid
        self.clingo_str = ""
        self.mapping = {}
        
        row_num = len(rail_map) - 1
        for row in rail_map:
            for col,cval in enumerate(row):
                self.clingo_str += "cell(({},{}), {}). ".format(col+0,row_num+0,cval)
                self.mapping[(col,row_num)] = cval
            row_num -= 1

        dir_map = {0:"n", 1:"e", 2:"s", 3:"w"}
        self.clingo_str += "start(cell({},{}),dir({})). ".format(env.agents[0].initial_position[1], env.agents[0].initial_position[0], dir_map[env.agents[0].initial_direction])
        self.clingo_str += "end(cell({},{})). ".format(env.agents[0].target[1], env.agents[0].target[0])
	

# generate environments
num_environments = 1 #only generating one

for envir in range(num_environments):
    #current_env = build_env(*parameters, envir) #set envir as seed value
    current_env = env

    # save entire environment -- BUG
    # current_env.save('save_test')

    # render an image
    env_renderer = RenderTool(current_env, gl="PILSVG", )
    env_renderer.render_env(show=True, show_observations=False, show_predictions=False)
    env_renderer.gl.save_image('./test_envs/env_{}.png'.format(envir))
    env_renderer.reset()

    # save rail grid
    #np.savetxt('./test_envs/env_'+str(envir), current_env, fmt = '% 4d')
    clingo = clingo_grid(current_env)
    f = open('./test_envs/env_'+str(envir), 'w')
    f.write(clingo.clingo_str)
    f.close()

In [19]:
#env.agents[0].initial_position, env.agents[0].target, env.agents[0].initial_direction

In [7]:
# manually determine start and end
#env.agents[0].initial_position = (1,9)
#env.agents[0].target = (7,15)
#env.agents[0].initial_direction = 0

In [8]:
#from flatland.utils.rendertools import RenderTool
env_renderer = RenderTool(env, gl="PILSVG", )
env_renderer.reset()

In [9]:
# Lets try to enter with all of these agents at the same time
action_dict = dict()

## Find a path using Clingo

Shouldn't be able to do actions once they hit

In [10]:
%%capture cap --no-stderr
%%clingo 1 - test_envs/env_0 transitions.lp actions.lp --outf=2

% ENCODING
maxTime(54).

at(cell(X0,Y0), D, 0) :- start(cell(X0,Y0), dir(D)).                          % determine first cell
do(f,0).                                                                      % first move is always forward

{ do(A,T) : move(A) } = 1 :- maxTime(M), T=1..M.                              % chose one move per time step
    
:- do(A,T), at(cell(X,Y), D, T), cell((X,Y), Type), not type(Type, (D,A)).    % keep only legal actions
    
at(cell(X1,Y1), ResultingDir, T) :- do(A,T-1), at(cell(X0,Y0), D, T-1), offset((D,A), (DX,DY), ResultingDir), X1=X0+DX, Y1=Y0+DY.    % determine resulting cells

:- end(cell(X,Y)), not at(cell(X,Y), _, _).                                   % must reach goal


### Save the output to an auxiliary file

In [11]:
with open('./output.json', 'w') as f:
     f.write(cap.stdout)

### Read actions from output.json

In [12]:
with open('./output.json', 'r') as f:
    file = json.load(f)
    
try:
    actions = file['Call'][0]['Witnesses'][0]['Value']
except KeyError as e:
    print('\n\nError:\n\nThe model is unsatisfiable.  Try increasing the maximum number of steps.\n\n')


In [13]:
action_list = []
for act in actions:
    a = re.match("[a-z_]*", act).group(0).upper()
    t = re.match(".*\((\d+)\)", act).group(1)
    action_list.append((int(t),a))

In [21]:
#action_list

### Sort actions into the correct order

In [15]:
#take these and keep just the actions in the right (sorted) order
actions = []
for tup in sorted(action_list):
    actions.append(tup[1])

In [16]:
max_actions = int(sorted(action_list)[-1][0])

### Create a `ClingoAgent` in Flatland

Benefit to inheret?  Ask Michel

In [17]:
# The first thing we notice is that some agents don't have feasible paths to their target.
# We first look at the map we have created

# nv_renderer.render_env(show=True)
# time.sleep(2)
# Import your own Agent or use RLlib to train agents on Flatland

# As an example we use a random agent instead
class ClingoAgent:

    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size

    def act(self, state):
        """
        :param state: input is the observation of the agent
        :return: returns an action
        """
        mapping = {"MOVE_FORWARD":RailEnvActions.MOVE_FORWARD, "MOVE_RIGHT":RailEnvActions.MOVE_RIGHT, "MOVE_LEFT":RailEnvActions.MOVE_LEFT, "STOP_MOVING":RailEnvActions.STOP_MOVING}
        print(state, mapping[actions[state]])
        return mapping[actions[state]]
        

    def step(self, memories):
        """
        Step function to improve agent by adjusting policy given the observations

        :param memories: SARS Tuple to be
        :return:
        """
        return

    def save(self, filename):
        # Store the current policy
        return

    def load(self, filename):
        # Load a policy
        return

### Render a visualization

In [18]:
# Initialize the agent with the parameters corresponding to the environment and observation_builder
controller = ClingoAgent(218, env.action_space[0])

# We start by looking at the information of each agent
# We can see the task assigned to the agent by looking at
print("\n Agents in the environment have to solve the following tasks: \n")
for agent_idx, agent in enumerate(env.agents):
    print(
        "The agent with index {} has the task to go from its initial position {}, facing in the direction {} to its target at {}.".format(
            agent_idx, agent.initial_position, agent.direction, agent.target))

# The agent will always have a status indicating if it is currently present in the environment or done or active
# For example we see that agent with index 0 is currently not active
print("\n Their current statuses are:")
print("============================")

for agent_idx, agent in enumerate(env.agents):
    print("Agent {} status is: {} with its current position being {}".format(agent_idx, str(agent.state),
                                                                             str(agent.position)))

# The agent needs to take any action [1,2,3] except do_nothing or stop to enter the level
# If the starting cell is free they will enter the level
# If multiple agents want to enter the same cell at the same time the lower index agent will enter first.

# Let's check if there are any agents with the same start location
agents_with_same_start = set()
print("\n The following agents have the same initial position:")
print("=====================================================")
for agent_idx, agent in enumerate(env.agents):
    for agent_2_idx, agent2 in enumerate(env.agents):
        if agent_idx != agent_2_idx and agent.initial_position == agent2.initial_position:
            print("Agent {} as the same initial position as agent {}".format(agent_idx, agent_2_idx))
            agents_with_same_start.add(agent_idx)

# Lets try to enter with all of these agents at the same time
action_dict = dict()

for agent_id in agents_with_same_start:
    action_dict[agent_id] = 1  # Try to move with the agents

# Do a step in the environment to see what agents entered:
env.step(action_dict)

# Current state and position of the agents after all agents with same start position tried to move
print("\n This happened when all tried to enter at the same time:")
print("========================================================")
for agent_id in agents_with_same_start:
    print(
        "Agent {} status is: {} with the current position being {}.".format(
            agent_id, str(env.agents[agent_id].state),
            str(env.agents[agent_id].position)))

# As you see only the agents with lower indexes moved. As soon as the cell is free again the agents can attempt
# to start again.

# You will also notice, that the agents move at different speeds once they are on the rail.
# The agents will always move at full speed when moving, never a speed inbetween.
# The fastest an agent can go is 1, meaning that it moves to the next cell at every time step
# All slower speeds indicate the fraction of a cell that is moved at each time step
# Lets look at the current speed data of the agents:

print("\n The speed information of the agents are:")
print("=========================================")

for agent_idx, agent in enumerate(env.agents):
    print(
        "Agent {} speed is: {:.2f} with the current fractional position being {}/{}".format(
            agent_idx, agent.speed_counter.speed, agent.speed_counter.counter, agent.speed_counter.max_count))

# New the agents can also have stochastic malfunctions happening which will lead to them being unable to move
# for a certain amount of time steps. The malfunction data of the agents can easily be accessed as follows
print("\n The malfunction data of the agents are:")
print("========================================")

for agent_idx, agent in enumerate(env.agents):
    print(
        "Agent {} is OK = {}".format(
            agent_idx, agent.malfunction_handler.in_malfunction))

# Now that you have seen these novel concepts that were introduced you will realize that agents don't need to take
# an action at every time step as it will only change the outcome when actions are chosen at cell entry.
# Therefore the environment provides information about what agents need to provide an action in the next step.
# You can access this in the following way.

# Chose an action for each agent
for a in range(env.get_num_agents()):
    action = controller.act(0)
    action_dict.update({a: action})
# Do the environment step
observations, rewards, dones, information = env.step(action_dict)
print("\n The following agents can register an action:")
print("========================================")
for info in information['action_required']:
    print("Agent {} needs to submit an action.".format(info))

# We recommend that you monitor the malfunction data and the action required in order to optimize your training
# and controlling code.

# Let us now look at an episode playing out with random actions performed

print("\nStart episode...")

# Reset the rendering system
if env_renderer is not None:
    env_renderer.reset()

# Here you can also further enhance the provided observation by means of normalization
# See training navigation example in the baseline repository


score = 0
# Run episode
frame_step = 0

os.makedirs("tmp/frames", exist_ok=True)

for step in range(max_actions+1):
    # Chose an action for each agent in the environment
    for a in range(env.get_num_agents()):
        #action = controller.act(observations[a])
        action = controller.act(step)
        action_dict.update({a: action})

    # Environment step which returns the observations for all agents, their corresponding
    # reward and whether their are done

    next_obs, all_rewards, done, _ = env.step(action_dict)

    if env_renderer is not None:
        env_renderer.render_env(show=True, show_observations=False, show_predictions=False)
        env_renderer.gl.save_image('tmp/frames/flatland_frame_{:04d}.png'.format(step))
        env_renderer.reset()
    
    done['__all__'] = False

#    frame_step += 1
    # Update replay buffer and train agent
#     for a in range(env.get_num_agents()):
#         controller.step((observations[a], action_dict[a], all_rewards[a], next_obs[a], done[a]))
#         score += all_rewards[a]

#     observations = next_obs.copy()
#     if done['__all__']:
#         print('all are done')
#         break
        
    print('Episode: Steps {}\t Score = {}'.format(step, score))

# close the renderer / rendering window
if env_renderer is not None:
    env_renderer.close_window()


 Agents in the environment have to solve the following tasks: 

The agent with index 0 has the task to go from its initial position (6, 6), facing in the direction 0 to its target at (18, 7).

 Their current statuses are:
Agent 0 status is: TrainState.WAITING with its current position being None

 The following agents have the same initial position:

 This happened when all tried to enter at the same time:

 The speed information of the agents are:
Agent 0 speed is: 1.00 with the current fractional position being 0/0

 The malfunction data of the agents are:
Agent 0 is OK = False
0 RailEnvActions.MOVE_FORWARD

 The following agents can register an action:
Agent 0 needs to submit an action.

Start episode...
0 RailEnvActions.MOVE_FORWARD
Episode: Steps 0	 Score = 0
1 RailEnvActions.MOVE_FORWARD
Episode: Steps 1	 Score = 0
2 RailEnvActions.MOVE_FORWARD
Episode: Steps 2	 Score = 0
3 RailEnvActions.MOVE_FORWARD
Episode: Steps 3	 Score = 0
4 RailEnvActions.MOVE_FORWARD
Episode: Steps 4	 Sc

---