In [13]:
import pandas as pd
import numpy as np
import joblib
import time
import abm_utils.abm as abm
import abm_utils.repositioning as repositioning
import abm_utils.split as split
from  ppo_utils.ppo_agent import PPOAgent, EnhancedStateHandler

In [None]:
"""
This is the same as the ABM.ipynb but its decisions are based on the PPO agent. Thus only the things differ to ABM.ipynb are described.
"""

STATE_DIM = 4  #The same state dimensions as during the training
ACTION_DIM = 2 # Direct, Split

# Initialise state handler
state_handler = EnhancedStateHandler(grid=None) 

# Create PPO instance for decision making
decision_agent_ppo = PPOAgent(state_dim=STATE_DIM, action_dim=ACTION_DIM)

# Load weights of NNs
decision_agent_ppo.load_models("Data/final_ppo_agent_140") # Der Ordnername muss übereinstimmen

2025-10-20 17:16:55,439 - DEBUG - Creating converter from 3 to 5


PPO-Modelle erfolgreich aus dem Verzeichnis 'Data/final_ppo_agent_140' geladen.


In [None]:
#Load the test data
sim_data_filtered = pd.read_csv("Data/TestData.csv")

In [7]:
forecastData = pd.read_parquet("Data/predicted_values_2022-10-24_14-17.parquet") #Predicted values for our timeframe between 14-17 with h3 indices
"""
First we need to create a dictionary with all predicted values in hexagons res = 8, which our abm can use for repositioning

"""
wide_df = forecastData.pivot(
    index='time_bin',
    columns='hex_id', 
    values='predicted_order_count'
)

wide_df.index = pd.to_datetime(wide_df.index)
wide_df = wide_df.fillna(0)
predictions_dict = wide_df.to_dict(orient='index')
first_key = list(predictions_dict.keys())[0]

pre_binned_demand = predictions_dict #pre_binned_demand is our 15 minutes bin per hexagon dictionary with which we will test our strategy

2025-10-20 17:17:01,786 - DEBUG - Using json encoder/decoder


In [None]:
def run_abm(timestart, steps, data, couriers, metrics, delivered_order_ids, order_queue, strategy, constants, rejection_model,assignment_log,decision_agent, state_handler):
    """
    Our abm dispatcher which controls all simulation functions.
    Args:   - constants will be defined in main simulation setup and is a tuple of numbers
            - data is our dataframe for the three hours
            - couriers are all couriers
            - strategy is the strategy used
            - metrics is a tuple of values to calculate simulation metrics (now with 6 values)
            - rejection model is the imported logistic regression probabilistic model
            - delivered_order_ids are the orders delivered so far and at what timestep
            - order_queue is the set of orders, which are in the queue 
    """
    # Moving the couriers
    couriers, metrics, delivered_order_ids = abm.move_couriers_new(
        couriers, timestart, metrics, delivered_order_ids,
        constants['SPEED_HEX_PER_STEP'], constants['steps']
    )
    if len(order_queue) > 50:
        constants["MAX_ACCEPTABLE_DELAY_SECONDS"] = 15 * 60
    elif len(order_queue) > 20:
        constants["MAX_ACCEPTABLE_DELAY_SECONDS"]  = 10 * 60
    else:
        constants["MAX_ACCEPTABLE_DELAY_SECONDS"]  = 5 * 60
    # Different strategies need different functions
    repositioning_enabled_strategies = ['Repositioning', 'Combined_Split']
    splitting_enabled_strategies = ['Split', 'Combined_Split']
    if strategy in repositioning_enabled_strategies and (timestart - constants['initial_timestart']) % constants['repositioning_interval'] == 0:
        current_bin_key = pd.to_datetime(timestart, unit='s').floor('15min') + pd.Timedelta(hours=8)
        dynamic_demand = constants['pre_binned_demand'].get(current_bin_key, {})
        if dynamic_demand:
            repositioning.run_repositioning_strategy(
                couriers, dynamic_demand, timestart, order_queue,
                constants['SPEED_HEX_PER_STEP'], constants['steps'],
                constants['MACRO_RESOLUTION'], constants['WORK_RESOLUTION']
            )

    # prepare the orders for the step
    new_orders_this_step = [order for _, order in data[
        (data['platform_order_time'] >= timestart) & 
        (data['platform_order_time'] < timestart + steps)
    ].iterrows()]
    for order in new_orders_this_step:
        order['assignment_status'] = 'pending_full'

    all_pending_orders = order_queue + [(order, 0) for order in new_orders_this_step]
    next_order_queue = []
    processed_order_ids_this_step = set()

    # process all orders
    for order, attempts in all_pending_orders:
        if order['order_id'] in processed_order_ids_this_step:
            continue

        was_processed = False
        if attempts > constants['MAX_QUEUE_ATTEMPTS']:
            was_processed, metrics, assignment_log = abm.handle_standard_assignment(order, attempts, couriers, timestart, constants, rejection_model, processed_order_ids_this_step, metrics, assignment_log)
    
        elif strategy in splitting_enabled_strategies:
            order_distance_feature = state_handler.get_order_specific_feature(order) #get distance feature
            global_features = state_handler.get_global_state_features(couriers, all_pending_orders) #get information about couriers and utilisation
            state_features = np.concatenate(([order_distance_feature], global_features)) #merge features
            action, _, _ = decision_agent.get_action(state_features, deterministic=True) #decide for action

            #if agent says direct, then standard assignment
            if action == 0 or order['assignment_status'] == 'pending_part2':
                was_processed, metrics, assignment_log = abm.handle_standard_assignment(order, attempts, couriers, timestart, constants, rejection_model, processed_order_ids_this_step, metrics, assignment_log)
            else:
                # The agent decided for option split
                idle_couriers = [c for c in couriers if c.state == 'IDLE' and order['order_id'] not in c.rejected_orders]
                c1, c2, r1, r2 = split.process_split_delivery(order, idle_couriers, timestart, constants)
                
                if c1 and c2:
                    was_processed, metrics = split.execute_split_assignment(
                        order, c1, c2, r1, r2, timestart, constants, 
                        rejection_model, processed_order_ids_this_step, next_order_queue, metrics
                    )
        else: 
            was_processed, metrics, assignment_log = abm.handle_standard_assignment(order, attempts, couriers, timestart, constants, rejection_model, processed_order_ids_this_step, metrics, assignment_log)

        if not was_processed: 
            next_order_queue.append((order, attempts + 1))

    return couriers, data, metrics, delivered_order_ids, next_order_queue, assignment_log

In [9]:
def evaluate_effectiveness(results, total_active_couriers):
    """
    Prints a summary of simulation results
    Arg. results: A dictionary containing the simulation outcomes. Format: {(scale, strategy): (total_time, metrics, total_distance)}
    """
    print("="*40 + "Simulation results" + "="*40)

    #iterate through each strategy and scale to print results for all
    for params, res in results.items():
        scale, strategy = params
        total_time, metrics, total_distance = res
        
        # Unpack the new, extended metrics tuple
        delay_inc, _, success, success_delay, stacked_count, rejected_count = metrics
        
        total_delivered = success + success_delay
        if total_delivered == 0: continue
        avg_delay = delay_inc / success_delay if success_delay > 0 else 0
        
        print(f"Scenario: '{strategy}' @ {int(scale*100)}% Flotte ({int(total_active_couriers * scale)} Fahrer)")
        print(f"Total Time to clear all orders: {total_time/3600:.2f} Stunden ({total_time/60:.0f} Minuten)")
        print(f"On-time or Early Deliveries: {success}")
        print(f"Late Deliveries: {success_delay}")
        print(f"Avg. Delay (for late deliveries): {avg_delay/60:.1f} Minuten")
        print(f"Total Distance Traveled (Hexagons): {total_distance} Zellen")
        print(f"Total Stacked Assignments: {stacked_count}")
        print(f"Total Rejected Offers: {rejected_count}")
        
        print("="*100)

In [None]:
"""
The main code is the same as in the standard notebook
"""

constants = {
    'initial_timestart': 1666591200, #2022-10-24 @14:00
    'SPEED_HEX_PER_STEP': 8, #count of hexgagon jump per step in workresolution
    'simulation_duration_hours': 3, #we simulate 14:00-17:00 e.g.
    'steps': 30, #duration of a single simulation timestep in seconds
    'repositioning_interval': 15 * 60, #the interval in seconds at which the repositioning strategy is triggered
    'MAX_ACCEPTABLE_DELAY_SECONDS': 5 * 60, #for assignments, to wait for better courier
    'MAX_QUEUE_ATTEMPTS': 20, #If in queue for 10 minutes -> best free courier is chosen 
    'pre_binned_demand': pre_binned_demand, # Our prediction for the three hours
    'MACRO_RESOLUTION': 8, #Hexagon resolution demand prediction and zones
    'WORK_RESOLUTION': 13 # Actors position and movement 
}
final_courier_states = {} #for evaluation
results = {} #for evaluation

rejection_model = joblib.load('Data/rejection_model.joblib')

warmup_duration_seconds = 30 * 60  # 30 Minuten

# Berechnen Sie den Startzeitpunkt für die Metrikerfassung
metrics_start_time = constants['initial_timestart'] + warmup_duration_seconds

# Fügen Sie eine neue Spalte 'phase' hinzu, die die Aufträge kategorisiert
sim_data_filtered['phase'] = np.where(
    sim_data_filtered['platform_order_time'] < metrics_start_time, 
    'warmup', 
    'tracked'
)

# Informieren Sie den Benutzer über die Aufteilung
tracked_orders_count = (sim_data_filtered['phase'] == 'tracked').sum()
print(f"Simulation initialisiert. Metriken werden für {tracked_orders_count} 'tracked' Aufträge gesammelt (nach t=30min).")


#determine the total number of couriers who were active in time window
total_active_couriers = sim_data_filtered['courier_id'].unique().shape[0] 
total_active_couriers = total_active_couriers*0.8

#define the scenarios to run
strategies = ['Split', 'Combined_Split']
repositioning_enabled_strategies = ['Repositioning', 'Combined_Split']
courier_scales = [0.8]
assignment_log_dict = {}

#Loop through each fleet scale
for scale in courier_scales:
    for strategy in strategies: #loop through each strategy
        
        #reset the simulation states
        start_time_real = time.time()
        timestart = constants['initial_timestart'] 
        sim_data = sim_data_filtered.copy()
        initial_order_ids = set(sim_data['order_id'])
        num_initial_orders = len(initial_order_ids)
        delivered_order_ids = set()
        # metrics = (delay_inc, 0, success, success_delay, stacked_orders, rejected_orders)
        metrics = (0, 0, 0, 0, 0, 0)
        order_queue = []
        assignment_log = []
        
        #initiate couriers at their position based on scale
        couriers = abm.initiate_couriers(int(total_active_couriers * scale), sim_data_filtered)      
       
        
        print(f"Starting Simulation:'{strategy}' with {len(couriers)} couriers ({int(scale*100)}%)...")

        if strategy in repositioning_enabled_strategies:
            #As we think that the fleet would not start at point zero
            print(f"Running warm up phase for fleet deployment")
            warmup_seconds = 15 * 60 
            warmup_start_time = constants['initial_timestart']  - warmup_seconds
            
            for t in range(warmup_start_time, constants['initial_timestart'] , constants['steps'] ):
                # Move couriers in repositioning task
                couriers, _, delivered_order_ids = abm.move_couriers_new(couriers, t, (0, 0, 0, 0, 0, 0), delivered_order_ids, constants['SPEED_HEX_PER_STEP'], constants['steps'])

                 # Use the pre-binned forecast for the first time-slot as the warm-up target.
                first_bin_key = pd.to_datetime(constants['initial_timestart'] , unit='s').floor('15min') + pd.Timedelta(hours=8)
                dynamic_demand = pre_binned_demand.get(first_bin_key, {})

                # assign reposition tasks
                if dynamic_demand:
                    repositioning.run_repositioning_strategy(couriers, dynamic_demand, t, [], 
                        constants['SPEED_HEX_PER_STEP'], constants['steps'], 
                        constants['MACRO_RESOLUTION'], constants['WORK_RESOLUTION']
                    )

        # Main Simulation Loop Begins here
        while len(delivered_order_ids) < num_initial_orders:
            # Function call of abm
            couriers, sim_data, metrics, delivered_order_ids, order_queue, assignment_log = run_abm(
                timestart, constants['steps'] , sim_data, couriers, metrics, delivered_order_ids, order_queue,
                strategy, constants, rejection_model , assignment_log, decision_agent_ppo, state_handler
            )
            timestart += constants['steps']  #add steps

            #Log every 10 minutes
            if (timestart - constants['initial_timestart'] ) % 600 == 0 and timestart > constants['initial_timestart'] :
                print(f"  > Time: {(timestart - constants['initial_timestart'] )/60:.0f} min | Delivered: {len(delivered_order_ids)}/{num_initial_orders} | Queue: {len(order_queue)}")
            
        # store metrics
        total_simulation_time = timestart - constants['initial_timestart'] 
        end_time_real = time.time()
        assignment_log_dict[(scale, strategy)] = assignment_log
        
        final_courier_states[(scale, strategy)] = couriers

        total_distance = abm.calculate_total_distance_in_hexes(couriers)        
        results[(scale, strategy)] = (total_simulation_time, metrics, total_distance)
        total_distance = abm.calculate_total_distance_in_hexes(couriers)

        
        print(f"Simulation (Skala {int(scale*100)}%) finished in {end_time_real - start_time_real:.2f} real seconds.")
        print(f"All {num_initial_orders} orders delivered. Total simulation time: {total_simulation_time/60:.0f} minutes.")

#final evaluation
evaluate_effectiveness(results, total_active_couriers)

Simulation initialisiert. Metriken werden für 5240 'tracked' Aufträge gesammelt (nach t=30min).
Starting Simulation:'Split' with 1006 couriers (80%)...
  [SPLIT SUCCESS] Order 22321 planned and assigned to C873 and C952
  [SPLIT SUCCESS] Order 38241 planned and assigned to C936 and C169
  [SPLIT SUCCESS] Order 82686 planned and assigned to C16 and C176
  [SPLIT SUCCESS] Order 101324 planned and assigned to C345 and C545
  [SPLIT SUCCESS] Order 170916 planned and assigned to C37 and C8
  [SPLIT SUCCESS] Order 261811 planned and assigned to C540 and C105
  [SPLIT SUCCESS] Order 292985 planned and assigned to C272 and C224
  [SPLIT SUCCESS] Order 365355 planned and assigned to C189 and C88
  [SPLIT SUCCESS] Order 406099 planned and assigned to C877 and C671
  [SPLIT SUCCESS] Order 416731 planned and assigned to C11 and C402
  [SPLIT SUCCESS] Order 422019 planned and assigned to C825 and C34
  [SPLIT SUCCESS] Order 482854 planned and assigned to C290 and C992
  [SPLIT SUCCESS] Order 540216

KeyboardInterrupt: 