In [None]:
NUM_ZONES = 12
FORECAST_RECENCY_BIAS = None
experiment_name = "zones12_alphaNone"
REPS = [0,1,2]

In [None]:
from pathlib import Path

config_path = Path('environments', 'configuration', 'afghanistan', 'v2')
experiment_results_path = Path("experiment_results", experiment_name)
path_to_policy_nets = experiment_results_path / "milestones"

In [None]:
from environments.medevac import MedevacDispatchingEnvironment
from gymnasium.wrappers import FlattenObservation

env = MedevacDispatchingEnvironment(
    map_config_file=config_path / "map.csv",
    MTF_config_file=config_path / "MTFs.csv", 
    staging_area_config_file=config_path / "staging_areas.csv",
    casualty_cluster_center_config_file=config_path / "casualty_cluster_centers.csv",
    intensity_function_config_file=config_path / "intensity_function_ranges_1.csv",
    num_zones=NUM_ZONES,
    forecast_recency_bias=FORECAST_RECENCY_BIAS,
    verbose=False
    )

state_dim = FlattenObservation(env).observation_space.shape[0]
print(f"State space dimensionality: {state_dim}")

n_actions = env.action_space.n
print(f"Number of actions: {n_actions}")

In [None]:
from algorithms.ddqn import DDQN

agents = [
    DDQN(
        env=env,
        is_constrained=True,
        num_episodes=20000,
        milestone_freq=200,
        save_path=path_to_policy_nets,
        rep=rep,
        offset=0,
        n_neurons=256,
        device='cpu',
        memory_size=int(2**19),
        learning_rate=1e-4,
        batch_size=32,
        epsilon_start=1,
        epsilon_end=0.1,
        epsilon_decay=0.999,
        gamma=0.99,
        tau=0.005,
        policy_net_update_freq=4,
        target_net_update_freq=4,
    ) for rep in REPS
]

In [None]:
for agent in agents:
    agent.train()

In [None]:
from algorithms.evaluation import EvaluationManager

eval_manager = EvaluationManager(
    save_path=experiment_results_path,
    n_neurons=256,
    device='cpu',
    env=env,
    is_constrained=True,
    num_eval_reps=30,
    n_jobs=10,
)

In [None]:
eval_manager.evaluate_policy_nets(path_to_policy_nets, parallelize=True)

In [None]:
eval_manager.determine_summary_statistics(parallelize=True)

In [None]:
eval_manager.plot_learning_curve(x_spacing=5000, yticks=range(40, 76, 5), ylim=(40, 75))