# Random Agent 

This is a random agent as implemented in examples provided [here](https://gitlab.aicrowd.com/flatland/flatland/blob/master/examples/training_example.py) by the Flatland challenge creators 

In [1]:
import numpy as np

from flatland.envs.observations import TreeObsForRailEnv, LocalObsForRailEnv
from flatland.envs.predictions import ShortestPathPredictorForRailEnv
from flatland.envs.rail_env import RailEnv
from flatland.envs.rail_generators import complex_rail_generator
from flatland.envs.schedule_generators import complex_schedule_generator
from flatland.utils.rendertools import RenderTool

In [2]:
np.random.seed(1)

# Use the complex_rail_generator to generate feasible network configurations with corresponding tasks
# Training on simple small tasks is the best way to get familiar with the environment
N_agents = 1

TreeObservation = TreeObsForRailEnv(max_depth=2, predictor=ShortestPathPredictorForRailEnv())
LocalGridObs = LocalObsForRailEnv(view_height=10, view_width=2, center=2)
rail_generator = complex_rail_generator(nr_start_goal=10, nr_extra=2, min_dist=8, max_dist=99999, seed=1)
env = RailEnv(width=20, height=20,
              rail_generator= rail_generator,
              schedule_generator=complex_schedule_generator(), 
              number_of_agents=N_agents, 
              obs_builder_object=TreeObservation)
env.reset()

env_renderer = RenderTool(env)

In [3]:
# Import your own Agent or use RLlib to train agents on Flatland
# As an example we use a random agent here
class RandomAgent:

    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size

    def act(self, state):
        """
        :param state: input is the observation of the agent
        :return: returns an action
        """
        return np.random.choice(np.arange(self.action_size))

    def step(self, memories):
        """
        Step function to improve agent by adjusting policy given the observations

        :param memories: SARS Tuple to be
        :return:
        """
        return

    def save(self, filename):
        # Store the current policy
        return

    def load(self, filename):
        # Load a policy
        return

In [4]:
# Initialize the agent with the parameters corresponding to the environment and observation_builder
agent = RandomAgent(218, 5)
n_trials = 20

# Empty dictionary for all agent action
action_dict = dict()
print("Starting Training...")

Starting Training...


In [5]:
all_scores = []
for trials in range(1, n_trials + 1):

    # Reset environment and get initial observations for all agents
    obs, info = env.reset()
    for idx in range(env.get_num_agents()):
        tmp_agent = env.agents[idx]
        tmp_agent.speed_data["speed"] = 1 / (idx + 1)
    env_renderer.reset()
    # Here you can also further enhance the provided observation by means of normalization
    # See training navigation example in the baseline repository

    score = 0
    # Run episode
    for step in range(500):
        # Chose an action for each agent in the environment
        for a in range(env.get_num_agents()):
            action = agent.act(obs[a])
            action_dict.update({a: action})
        # Environment step which returns the observations for all agents, their corresponding
        # reward and whether their are done
        next_obs, all_rewards, done, _ = env.step(action_dict)
        # note that rendering significantly slows down the average run time
        # consider commenting the rendering command for a faster run
        env_renderer.render_env(show=True, show_observations=True, show_predictions=False)

        # Update replay buffer and train agent
        for a in range(env.get_num_agents()):
            agent.step((obs[a], action_dict[a], all_rewards[a], next_obs[a], done[a]))
            score += all_rewards[a]
        obs = next_obs.copy()
        if done['__all__']:
            break
    all_scores.append(score)        
    print('Episode Nr. {}\t Score = {}'.format(trials, score))

open_window - pyglet
Episode Nr. 1	 Score = -500.0
Episode Nr. 2	 Score = -500.0
Episode Nr. 3	 Score = -91.0
Episode Nr. 4	 Score = -500.0
Episode Nr. 5	 Score = -500.0
Episode Nr. 6	 Score = -500.0
Episode Nr. 7	 Score = -500.0
Episode Nr. 8	 Score = -500.0
Episode Nr. 9	 Score = -17.0
Episode Nr. 10	 Score = -50.0
Episode Nr. 11	 Score = -500.0
Episode Nr. 12	 Score = -94.0
Episode Nr. 13	 Score = -422.0
Episode Nr. 14	 Score = -214.0
Episode Nr. 15	 Score = -500.0
Episode Nr. 16	 Score = -500.0
Episode Nr. 17	 Score = -69.0
Episode Nr. 18	 Score = -92.0
Episode Nr. 19	 Score = -80.0
Episode Nr. 20	 Score = -17.0


In [6]:
avg = np.mean(all_scores)
print('average score of a random agent over {} episodes is {}'.format(n_trials, avg))

average score of a random agent over 20 episodes is -307.3
