In [1]:
# ## 1. Library Imports and Initial Setup
import pandas as pd
import numpy as np
import math
import random
import h3
import time
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
import joblib
import rejection
import abm 
import repositioning
import split
import plots



In [2]:
import numpy as np
from collections import deque
import abm # Wichtig, da wir die H3-Distanzfunktion daraus nutzen

class EnhancedStateHandler:
    """
    Diese Klasse ist eine 1:1-Adaption von 'EnhancedStateHandler' für
    ein H3-Hexagon-Grid-System.
    
    Sie behält die Logik der Feature-Extraktion und Diskretisierung bei,
    nutzt aber H3-spezifische Funktionen für die Berechnungen.
    """
    def __init__(self, n_distance_bins=10, n_courier_bins=10):
        """
        Initialisiert den State Handler.
        
        Args:
            n_distance_bins (int): Anzahl der Kategorien für die Distanz.
            n_courier_bins (int): Anzahl der Kategorien für die Kurier-Verfügbarkeit.
            max_h3_distance (int): Die maximal erwartete Distanz in Hexagons,
                                   wird zur Normalisierung auf [0,1] genutzt.
        """
        self.n_distance_bins = n_distance_bins
        self.n_courier_bins = n_courier_bins
        
        # ANPASSUNG: Wir definieren eine maximale Distanz in H3-Hexagons.
        max_h3_distance = 1346
        self.max_distance = max_h3_distance
        self.max_sqrt_distance = np.sqrt(self.max_distance)
        # Die Logik für die Erstellung der Bins bleibt identisch.
        #self.distance_bins = [-0.001, 0.164, 0.239, 0.302, 0.363, 0.421, 0.479, 0.54, 0.617, 0.741, 1.0]        
        self.courier_bins = np.linspace(0, 1, n_courier_bins + 1)
        self.distance_bins = np.linspace(0,1,n_distance_bins+1)
        
        # Die Rolling Windows für Metriken bleiben identisch.
        self.delivery_times = deque(maxlen=100)
        self.system_load = deque(maxlen=100)

        self.raw_utilization_log = [] 

    def normalize_distance(self, h3_distance):
        if h3_distance == float('inf'):
            return 1.0
        sqrt_distance = np.sqrt(h3_distance)
        norm_sqrt_distance = sqrt_distance / self.max_sqrt_distance
        return min(norm_sqrt_distance, 1.0)

    # In Q-Learning.ipynb -> EnhancedStateHandler
    # In Q-Learning.ipynb -> EnhancedStateHandler
    def get_state_features(self, order, couriers):
        """
        Extracts and normalizes state features with a corrected,
        scaled calculation for system utilization.
        """
        # In EnhancedStateHandler -> get_state_features (FINALE VERSION)

    def get_state_features(self, order, couriers):
        """
        Extrahiert und normalisiert Zustandsmerkmale mit einer datengestützten,
        kalibrierten Skalierung der Systemauslastung.
        """
        # 1. Distanzmerkmal (unverändert)
        distance_in_hex = abm.get_hex_distance(
            order['sender_h3'], order['recipient_h3']
        )
        norm_distance = self.normalize_distance(distance_in_hex)

        # 2. FINALE, KALIBRIERTE SKALIERUNG DER AUSLASTUNG
        
        # Die Werte aus Ihrer Grafik!
        MIN_REAL_UTILIZATION = 0.15
        MAX_REAL_UTILIZATION = 0.40

        total_couriers = len(couriers)
        if total_couriers == 0:
            scaled_utilization = 1.0
        else:
            # Berechne die rohe, proportionale Auslastung
            total_active_deliveries = sum(c.active_deliveries for c in couriers)
            max_possible_deliveries = total_couriers * 3
            raw_utilization = (total_active_deliveries / max_possible_deliveries) if max_possible_deliveries > 0 else 0
                
            # Bilde den ECHTEN Betriebsbereich auf den vollen Bereich [0, 1] ab.
            if raw_utilization <= MIN_REAL_UTILIZATION:
                scaled_utilization = 0.0
            elif raw_utilization >= MAX_REAL_UTILIZATION:
                scaled_utilization = 1.0
            else:
                scaled_utilization = (raw_utilization - MIN_REAL_UTILIZATION) / \
                                    (MAX_REAL_UTILIZATION - MIN_REAL_UTILIZATION)

        norm_availability = 1 - scaled_utilization
        
        return norm_distance, norm_availability

    def discretize_state(self, state_features):
        """
        Diese Funktion wandelt die normalisierten Features in diskrete Bins um.
        Sie ist rein mathematisch und bleibt daher 1:1 identisch.
        """
        norm_distance, norm_availability = state_features
        
        # `np.digitize` ordnet die Werte (z.B. 0.75) dem passenden Bin zu.
        distance_bin = np.digitize(norm_distance, self.distance_bins) - 1
        courier_bin = np.digitize(norm_availability, self.courier_bins) - 1
        
        return (distance_bin, courier_bin)

    def get_state(self, order, couriers):
        """
        Dies ist die Hauptfunktion, die alles zusammenführt.
        Sie ruft die Feature-Extraktion und die Diskretisierung auf.
        """
        features = self.get_state_features(order, couriers)
        discretized_state = self.discretize_state(features)
        return discretized_state

    def update_delivery_time(self, delivery_time):
        """Diese Funktion bleibt 1:1 identisch."""
        self.delivery_times.append(delivery_time)
    

In [21]:
import joblib
# Importieren Sie die Klassen aus Ihrer Agenten-Datei
from RL_agent_v2 import QLearningAgent

# 1. State Handler initialisieren (grid=None ist in Ordnung)
state_handler = EnhancedStateHandler(n_distance_bins=10, n_courier_bins=10) 

# 2. Agenten-Instanz für Entscheidungen erstellen
decision_agent = QLearningAgent(epsilon=0.0) # Epsilon = 0, da er nur die beste Aktion wählen soll

# 3. Trainierte Q-Tabelle laden
try:
    trained_q_table = joblib.load('q_learning_agent_nbins_00_500.joblib')
    decision_agent.q_table = trained_q_table
    print("✅ Trainierter RL-Agent erfolgreich geladen und einsatzbereit.")
except FileNotFoundError:
    print("❗️ Fehler: 'final_q_learning_model.joblib' nicht gefunden. Agent startet ohne Wissen.")

✅ Trainierter RL-Agent erfolgreich geladen und einsatzbereit.


In [4]:
sim_data_filtered = pd.read_csv("Data.csv")

In [5]:
forecastData = pd.read_parquet("predicted_values_2022-10-24_14-17.parquet") #Predicted values for our timeframe between 14-17 with h3 indices

"""
First we need to create a dictionary with all predicted values in hexagons res = 8, which our abm can use for repositioning

"""

wide_df = forecastData.pivot(
    index='time_bin',
    columns='hex_id', 
    values='predicted_order_count'
)

wide_df.index = pd.to_datetime(wide_df.index)
wide_df = wide_df.fillna(0)
predictions_dict = wide_df.to_dict(orient='index')
first_key = list(predictions_dict.keys())[0]

pre_binned_demand = predictions_dict #pre_binned_demand is our 15 minutes bin per hexagon dictionary with which we will test our strategy

In [6]:
def run_abm(timestart, steps, data, couriers, metrics, delivered_order_ids, order_queue, strategy, constants, rejection_model, assignment_log, decision_agent, state_handler):
    """
    Our abm dispatcher which controls all simulation functions.
    Args:   - constants will be defined in main simulation setup and is a tuple of numbers
            - data is our dataframe for the three hours
            - couriers are all couriers
            - strategy is the strategy used
            - metrics is a tuple of values to calculate simulation metrics (now with 6 values)
            - rejection model is the imported logistic regression probabilistic model
            - delivered_order_ids are the orders delivered so far and at what timestep
            - order_queue is the set of orders, which are in the queue 
    """
    # Moving the couriers
    couriers, metrics, delivered_order_ids = abm.move_couriers_new(
        couriers, timestart, metrics, delivered_order_ids,
        constants['SPEED_HEX_PER_STEP'], constants['steps']
    )
    if len(order_queue) > 50:
        constants["MAX_ACCEPTABLE_DELAY_SECONDS"] = 15 * 60
    elif len(order_queue) > 20:
        constants["MAX_ACCEPTABLE_DELAY_SECONDS"]  = 10 * 60
    else:
        constants["MAX_ACCEPTABLE_DELAY_SECONDS"]  = 5 * 60
    # Different strategies need different functions
    repositioning_enabled_strategies = ['Repositioning', 'Combined_Split']
    splitting_enabled_strategies = ['Split', 'Combined_Split']
    if strategy in repositioning_enabled_strategies and (timestart - constants['initial_timestart']) % constants['repositioning_interval'] == 0:
        current_bin_key = pd.to_datetime(timestart, unit='s').floor('15min') + pd.Timedelta(hours=8)
        dynamic_demand = constants['pre_binned_demand'].get(current_bin_key, {})
        if dynamic_demand:
            repositioning.run_repositioning_strategy(
                couriers, dynamic_demand, timestart, order_queue,
                constants['SPEED_HEX_PER_STEP'], constants['steps'],
                constants['MACRO_RESOLUTION'], constants['WORK_RESOLUTION']
            )

    # prepare the orders for the step
    new_orders_this_step = [order for _, order in data[
        (data['platform_order_time'] >= timestart) & 
        (data['platform_order_time'] < timestart + steps)
    ].iterrows()]
    for order in new_orders_this_step:
        order['assignment_status'] = 'pending_full'

    all_pending_orders = order_queue + [(order, 0) for order in new_orders_this_step]
    next_order_queue = []
    processed_order_ids_this_step = set()

    # process all orders
    for order, attempts in all_pending_orders:
        if order['order_id'] in processed_order_ids_this_step:
            continue

        was_processed = False
        if attempts > constants['MAX_QUEUE_ATTEMPTS']:
            was_processed, metrics, assignment_log = abm.handle_standard_assignment(order, attempts, couriers, timestart, constants, rejection_model, processed_order_ids_this_step, metrics, assignment_log)
        # NEU
        elif strategy in splitting_enabled_strategies:
            # === NEUE RL-ENTSCHEIDUNGSLOGIK ===
            
            # Aufträge mit Status 'pending_part2' werden immer standardmäßig behandelt
            if order['assignment_status'] == 'pending_part2':
                action = 0 # Erzwinge Standard-Zuweisung
            else:
                # 1. Aktuellen Zustand für den Agenten ermitteln
                
                state_features = state_handler.get_state_features(order, couriers)
                current_state = state_handler.discretize_state(state_features)

                
                # 2. Agenten nach der besten Aktion fragen (0 = direct, 1 = split)
                action = decision_agent.get_action(current_state)

            # 3. Aktion ausführen
            if action == 0: # Entscheidung: DIRECT (Standard-Zuweisung)
                was_processed, metrics, assignment_log = abm.handle_standard_assignment(order, attempts, couriers, timestart, constants, rejection_model, processed_order_ids_this_step, metrics, assignment_log)
            
            else: # Entscheidung: SPLIT
                idle_couriers = [c for c in couriers if c.state == 'IDLE' and order['order_id'] not in c.rejected_orders]
                c1, c2, r1, r2 = split.process_split_delivery(order, idle_couriers, timestart, constants)
                
                if c1 and c2:
                    was_processed, metrics = split.execute_split_assignment(
                        order, c1, c2, r1, r2, timestart, constants, 
                        rejection_model, processed_order_ids_this_step, next_order_queue, metrics
                    )
        else: 
            was_processed, metrics, assignment_log = abm.handle_standard_assignment(order, attempts, couriers, timestart, constants, rejection_model, processed_order_ids_this_step, metrics, assignment_log)

        if not was_processed: 
            next_order_queue.append((order, attempts + 1))

    return couriers, data, metrics, delivered_order_ids, next_order_queue, assignment_log

In [7]:
def evaluate_effectiveness(results, total_active_couriers):
    """
    Prints a summary of simulation results
    Arg. results: A dictionary containing the simulation outcomes. Format: {(scale, strategy): (total_time, metrics, total_distance)}
    """
    print("="*40 + "Simulation results" + "="*40)

    #iterate through each strategy and scale to print results for all
    for params, res in results.items():
        scale, strategy = params
        total_time, metrics, total_distance = res
        
        # Unpack the new, extended metrics tuple
        delay_inc, _, success, success_delay, stacked_count, rejected_count = metrics
        
        total_delivered = success + success_delay
        if total_delivered == 0: continue
        avg_delay = delay_inc / success_delay if success_delay > 0 else 0
        
        print(f"Scenario: '{strategy}' @ {int(scale*100)}% Flotte ({int(total_active_couriers * scale)} Fahrer)")
        print(f"Total Time to clear all orders: {total_time/3600:.2f} Stunden ({total_time/60:.0f} Minuten)")
        print(f"On-time or Early Deliveries: {success}")
        print(f"Late Deliveries: {success_delay}")
        print(f"Avg. Delay (for late deliveries): {avg_delay/60:.1f} Minuten")
        print(f"Total Distance Traveled (Hexagons): {total_distance} Zellen")
        print(f"Total Stacked Assignments: {stacked_count}")
        print(f"Total Rejected Offers: {rejected_count}")
        
        print("="*100)

In [27]:
constants = {
    'initial_timestart': 1666591200, #2022-10-24 @14:00
    'SPEED_HEX_PER_STEP': 8, #count of hexgagon jump per step in workresolution
    'simulation_duration_hours': 3, #we simulate 14:00-17:00 e.g.
    'steps': 30, #duration of a single simulation timestep in seconds
    'repositioning_interval': 15 * 60, #the interval in seconds at which the repositioning strategy is triggered
    'MAX_ACCEPTABLE_DELAY_SECONDS': 5 * 60, #for assignments, to wait for better courier
    'MAX_QUEUE_ATTEMPTS': 20, #If in queue for 10 minutes -> best free courier is chosen 
    'pre_binned_demand': pre_binned_demand, # Our prediction for the three hours
    'MACRO_RESOLUTION': 8, #Hexagon resolution demand prediction and zones
    'WORK_RESOLUTION': 13 # Actors position and movement 
}
final_courier_states = {} #for evaluation
results = {} #for evaluation

rejection_model = joblib.load('rejection_model.joblib')

warmup_duration_seconds = 30 * 60  # 30 Minuten

# Berechnen Sie den Startzeitpunkt für die Metrikerfassung
metrics_start_time = constants['initial_timestart'] + warmup_duration_seconds

# Fügen Sie eine neue Spalte 'phase' hinzu, die die Aufträge kategorisiert
sim_data_filtered['phase'] = np.where(
    sim_data_filtered['platform_order_time'] < metrics_start_time, 
    'warmup', 
    'tracked'
)

# Informieren Sie den Benutzer über die Aufteilung
tracked_orders_count = (sim_data_filtered['phase'] == 'tracked').sum()
print(f"Simulation initialisiert. Metriken werden für {tracked_orders_count} 'tracked' Aufträge gesammelt (nach t=30min).")


#determine the total number of couriers who were active in time window
total_active_couriers = sim_data_filtered['courier_id'].unique().shape[0] 
total_active_couriers = total_active_couriers*0.8

#define the scenarios to run
strategies = ['Split', 'Combined_Split']
repositioning_enabled_strategies = ['Repositioning', 'Combined_Split']
courier_scales = [0.5]
assignment_log_dict = {}

#Loop through each fleet scale
for scale in courier_scales:
    for strategy in strategies: #loop through each strategy
        
        #reset the simulation states
        start_time_real = time.time()
        timestart = constants['initial_timestart'] 
        sim_data = sim_data_filtered.copy()
        initial_order_ids = set(sim_data['order_id'])
        num_initial_orders = len(initial_order_ids)
        delivered_order_ids = set()
        # metrics = (delay_inc, 0, success, success_delay, stacked_orders, rejected_orders)
        metrics = (0, 0, 0, 0, 0, 0)
        order_queue = []
        assignment_log = []
        
        #initiate couriers at their position based on scale
        couriers = abm.initiate_couriers(int(total_active_couriers * scale), sim_data_filtered)      
       
        
        print(f"Starting Simulation:'{strategy}' with {len(couriers)} couriers ({int(scale*100)}%)...")

        if strategy in repositioning_enabled_strategies:
            #As we think that the fleet would not start at point zero
            print(f"Running warm up phase for fleet deployment")
            warmup_seconds = 15 * 60 
            warmup_start_time = constants['initial_timestart']  - warmup_seconds
            
            for t in range(warmup_start_time, constants['initial_timestart'] , constants['steps'] ):
                # Move couriers in repositioning task
                couriers, _, delivered_order_ids = abm.move_couriers_new(couriers, t, (0, 0, 0, 0, 0, 0), delivered_order_ids, constants['SPEED_HEX_PER_STEP'], constants['steps'])

                 # Use the pre-binned forecast for the first time-slot as the warm-up target.
                first_bin_key = pd.to_datetime(constants['initial_timestart'] , unit='s').floor('15min') + pd.Timedelta(hours=8)
                dynamic_demand = pre_binned_demand.get(first_bin_key, {})

                # assign reposition tasks
                if dynamic_demand:
                    repositioning.run_repositioning_strategy(couriers, dynamic_demand, t, [], 
                        constants['SPEED_HEX_PER_STEP'], constants['steps'], 
                        constants['MACRO_RESOLUTION'], constants['WORK_RESOLUTION']
                    )

        # Main Simulation Loop Begins here
        while len(delivered_order_ids) < num_initial_orders:
            # Function call of abm
            couriers, sim_data, metrics, delivered_order_ids, order_queue, assignment_log = run_abm(
                timestart, constants['steps'] , sim_data, couriers, metrics, delivered_order_ids, order_queue,
                strategy, constants, rejection_model , assignment_log,decision_agent=decision_agent,state_handler=state_handler
            )
            timestart += constants['steps']  #add steps

            #Log every 10 minutes
            if (timestart - constants['initial_timestart'] ) % 600 == 0 and timestart > constants['initial_timestart'] :
                print(f"  > Time: {(timestart - constants['initial_timestart'] )/60:.0f} min | Delivered: {len(delivered_order_ids)}/{num_initial_orders} | Queue: {len(order_queue)}")
            
        # store metrics
        total_simulation_time = timestart - constants['initial_timestart'] 
        end_time_real = time.time()
        assignment_log_dict[(scale, strategy)] = assignment_log
        
        final_courier_states[(scale, strategy)] = couriers

        total_distance = abm.calculate_total_distance_in_hexes(couriers)        
        results[(scale, strategy)] = (total_simulation_time, metrics, total_distance)
        total_distance = abm.calculate_total_distance_in_hexes(couriers)

        
        print(f"Simulation (Skala {int(scale*100)}%) finished in {end_time_real - start_time_real:.2f} real seconds.")
        print(f"All {num_initial_orders} orders delivered. Total simulation time: {total_simulation_time/60:.0f} minutes.")

#final evaluation
evaluate_effectiveness(results, total_active_couriers)

Simulation initialisiert. Metriken werden für 5240 'tracked' Aufträge gesammelt (nach t=30min).
Starting Simulation:'Split' with 629 couriers (50%)...
  [SPLIT SUCCESS] Order 430005 planned and assigned to C576 and C453
  [SPLIT SUCCESS] Order 508820 planned and assigned to C258 and C115
  [SPLIT SUCCESS] Order 558211 planned and assigned to C164 and C464
  [SPLIT SUCCESS] Order 206450 planned and assigned to C202 and C36
  [SPLIT SUCCESS] Order 337456 planned and assigned to C489 and C293
  [SPLIT SUCCESS] Order 359737 planned and assigned to C418 and C616
  [SPLIT SUCCESS] Order 532345 planned and assigned to C511 and C148
  [SPLIT SUCCESS] Order 70016 planned and assigned to C479 and C294
  [SPLIT SUCCESS] Order 73147 planned and assigned to C452 and C431
  [SPLIT SUCCESS] Order 343127 planned and assigned to C395 and C145
  [SPLIT SUCCESS] Order 400703 planned and assigned to C239 and C396
  [SPLIT SUCCESS] Order 88852 planned and assigned to C6 and C67
  [SPLIT SUCCESS] Order 2985