In [3]:
%load_ext autoreload
%autoreload 2
from src.evolution_algos import cem_uncorrelated, saes_1_1, ObjectiveFunction
from src.evolution_policy import NeuralNetworkPolicy, LogisticRegression

import numpy as np
from src.test_utils import RenderWrapper
from flatland.envs.line_generators import SparseLineGen
from flatland.envs.malfunction_generators import (
    MalfunctionParameters,
    ParamMalfunctionGen,
)
from flatland.envs.persistence import RailEnvPersister
from flatland.envs.rail_env import RailEnv
from flatland.envs.rail_generators import SparseRailGen
# from flatland_cutils import TreeObsForRailEnv as TreeCutils
from flatland.envs.observations import GlobalObsForRailEnv

from flatland.envs.observations import TreeObsForRailEnv
from flatland.envs.distance_map import DistanceMap
import flatland.envs.rail_env_shortest_paths as sp

from flatland.utils.rendertools import RenderTool

from src.observation_utils import normalize_observation

import time

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [4]:
# Create the environment
observation_tree_depth = 1
observation_radius = 10
observation_max_path_depth = 20

env = RailEnv(
    width=20,
    height=15,
    rail_generator=SparseRailGen(
        seed=0,  # Random seed
        max_num_cities=2,  # Number of cities
        grid_mode=True,
        max_rails_between_cities=2,
        max_rail_pairs_in_city=1,
    ),
    line_generator=SparseLineGen(speed_ratio_map={1.: 1.}
        ),
    number_of_agents=2, # Only one agent
    obs_builder_object=TreeObsForRailEnv(max_depth=observation_tree_depth),
    malfunction_generator=ParamMalfunctionGen(
        MalfunctionParameters(
            malfunction_rate=0.,  # Rate of malfunction
            min_duration=3,  # Minimal duration
            max_duration=20,  # Max duration
        )
    ),
)

In [5]:
# Initializing nn policy
n_features_per_node = env.obs_builder.observation_dim
n_nodes = sum([np.power(4, i) for i in range(observation_tree_depth + 1)])
state_size = n_features_per_node * n_nodes
n_actions = 5
nn_policy = NeuralNetworkPolicy(state_size, n_actions, 32)
print(nn_policy.num_params)

Number of parameters per layer: [(56, 32), (33, 5)]
Number of parameters (neural network weights) to optimize: 1957
1957


In [6]:
# Cross entropy method

# Create the objective function
objective_function = ObjectiveFunction(env, nn_policy,observation_tree_depth=observation_tree_depth, observation_radius=observation_radius, num_episodes=1 , max_time_steps=200)

init_mean_array = np.random.random(size = nn_policy.num_params)
init_var_array = np.ones(shape=nn_policy.num_params) * 1000.
hist_dict = {}
theta = cem_uncorrelated(
    objective_function,
    mean_array = init_mean_array,
    var_array = init_var_array,
    max_iterations=50,
    sample_size = 50,
    elite_frac=0.1,
    print_every=5,
    success_score=15,
    hist_dict=hist_dict
)

iteration :  0
sorted samples :  [-0.0, 3.0, 11.0, 17.0, 18.0, 19.0, 19.0, 22.0, 22.0, 24.0, 24.0, 26.0, 26.0, 26.0, 27.0, 27.0, 27.0, 27.0, 28.0, 28.0, 28.0, 28.0, 28.0, 29.0, 29.0, 29.0, 29.0, 30.0, 34.0, 36.0, 37.0, 38.0, 40.0, 40.0, 40.0, 42.0, 42.0, 42.0, 43.0, 43.0, 43.0, 44.0, 44.0, 45.0, 47.0, 53.0, 55.0, 57.0, 58.0, 60.0]
32.68
iteration :  5
sorted samples :  [-0.0, -0.0, -0.0, 2.0, 2.0, 2.0, 5.0, 8.0, 8.0, 9.0, 10.0, 11.0, 18.0, 21.0, 22.0, 23.0, 23.0, 24.0, 25.0, 25.0, 25.0, 25.0, 25.0, 26.0, 26.0, 26.0, 26.0, 26.0, 26.0, 27.0, 27.0, 27.0, 27.0, 28.0, 28.0, 30.0, 31.0, 31.0, 35.0, 35.0, 36.0, 37.0, 39.0, 42.0, 45.0, 49.0, 54.0, 56.0, 56.0, 61.0]
25.4
iteration :  10
sorted samples :  [-0.0, 3.0, 3.0, 4.0, 8.0, 9.0, 10.0, 14.0, 19.0, 20.0, 21.0, 23.0, 24.0, 25.0, 25.0, 27.0, 27.0, 28.0, 28.0, 28.0, 29.0, 29.0, 30.0, 31.0, 31.0, 31.0, 32.0, 34.0, 34.0, 35.0, 37.0, 37.0, 37.0, 39.0, 41.0, 41.0, 42.0, 43.0, 43.0, 43.0, 51.0, 53.0, 54.0, 54.0, 58.0, 58.0, 59.0, 60.0, 62.0, 64.0]

In [7]:
hist_dict = {}

init_x_array = np.random.random(size = nn_policy.num_params)
init_sigma_array = np.ones(shape=nn_policy.num_params) * 1.



objective_function = ObjectiveFunction(env, nn_policy,observation_tree_depth=observation_tree_depth, observation_radius=observation_radius, num_episodes=2 , max_time_steps=200)

saes = saes_1_1(
    objective_function,
    x_array = init_x_array,
    sigma_array = init_sigma_array,
    tau = 0.001,
    max_iterations=100,
    print_every=5,
    success_score=10,
    hist_dict=hist_dict
)

# does not seem to converge

Iteration 0/100: Score = 43.0
Iteration 5/100: Score = 36.0
Iteration 10/100: Score = 6.5
Success after 10 iterations!
