In [None]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Crowdshipping Pricing Model

This module implements an optimization model for crowdshipping platforms that:
- Match drivers with customer orders
- Build driver routes
- Determine optimal pricing strategies

The model uses mixed-integer programming (MIP) with OR-Tools to maximize
platform profit while satisfying constraints for time windows, capacity,
willingness-to-pay (WTP), and expected compensation (ETP).

Author: [Your Name]
Date: [Date]
Version: 1.0
"""

import os
import numpy as np
import networkx as nx
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.patches as mpatches
from datetime import datetime, timedelta
from openpyxl import load_workbook

# Geospatial libraries
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import geopandas as gpd
import fiona

# Optimization libraries
from ortools.linear_solver import pywraplp
from haversine import haversine

# ============================================================================
# CONFIGURATION AND CONSTANTS
# ============================================================================

# Large number for Big-M constraints
M = 10000

# Time format for output
TIME_FORMAT_OUTPUT = '%H:%M'

# Repository configuration
REPO_URL = "https://github.com/rimchmielowitz/Pricing.git"
REPO_NAME = "Pricing"

# Data paths for Berlin instances
DATA_FOLDER = os.path.join(os.getcwd(), "data_Berlin")
TWD_DATA_PATH = os.path.join(DATA_FOLDER, "TWD.csv")
DISTRICTS_DATA_PATH = os.path.join(DATA_FOLDER, "districts.csv")

# ============================================================================
# SETUP AND INSTALLATION
# ============================================================================

def install_requirements():
    """Install required packages for the pricing model (Colab compatible)."""
    import subprocess
    import sys
    
    packages = [
        'matplotlib', 'networkx', 'numpy', 'haversine', 'cartopy',
        'geopandas', 'datetime', 'openpyxl', 'ortools',
        'fiona', 'shapely', 'pyproj', 'rtree'
    ]
    
    for package in packages:
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])
    
    # Upgrade specific packages
    subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "fiona", "geopandas"])


def setup_repository():
    """Clone the repository if it doesn't exist and set up environment."""
    if not os.path.exists(REPO_NAME):
        os.system(f"git clone --recurse-submodules {REPO_URL}")
        print("✅ Repository wurde geklont.")
    else:
        print("✅ Repository existiert bereits.")
    
    # Change to repository folder
    os.chdir(REPO_NAME)
    
    # Enable SHAPE_RESTORE_SHX
    os.environ["SHAPE_RESTORE_SHX"] = "YES"


# ============================================================================
# DATA PROCESSING AND SET CREATION
# ============================================================================

def read_data_create_sets(twd_data_path, districts_data_path):
    """
    Read and process data from CSV files to create optimization sets.
    
    Args:
        twd_data_path (str): Path to TWD.csv file
        districts_data_path (str): Path to districts.csv file
    
    Returns:
        tuple: All processed data and sets needed for the optimization model
    """
    delimiter = ';'
    
    # Read main data file
    df = pd.read_csv(
        twd_data_path, 
        delimiter=delimiter, 
        header=0, 
        parse_dates=['Time a', 'Time b'], 
        date_format="%H:%M"
    )
    
    # Read districts data
    df_districts = pd.read_csv(districts_data_path, delimiter=delimiter, header=0)
    
    # Extract basic parameters
    m = int(df.at[0, "m"])  # Number of drivers
    n = int(df.at[0, "n"])  # Number of requests
    
    # Process WTP and ETP factors (handle both float and string formats)
    wtp_factor = _process_factor(df.at[0, "WTP_Faktor"])
    etp_factor = _process_factor(df.at[0, "ETP_Faktor"])
    speed = int(df.at[0, "Speed"])
    
    # Define row ranges for different data sections
    start_row_pickup = 0
    start_row_delivery = start_row_pickup + n
    start_row_driver = start_row_delivery + n
    
    # Create sets
    A = df.loc[start_row_pickup:start_row_delivery-1, 'i'].astype(int).tolist()
    A_ = df.loc[start_row_delivery:start_row_driver-1, 'i'].astype(int).tolist()
    
    # Process coordinates
    all_x = df.loc[start_row_pickup:start_row_driver+2*m-1, 'Long'].str.replace(',', '.').astype(float).tolist()
    all_x.insert(0, 0.0)
    all_y = df.loc[start_row_pickup:start_row_driver+2*m-1, 'Lat'].str.replace(',', '.').astype(float).tolist()
    all_y.insert(0, 0.0)
    
    # Process driver capacities
    R2 = df.loc[start_row_driver:start_row_driver+m-1, 'R'].astype(int).tolist()
    R2.insert(0, 0)
    
    # Process district information
    district_names = {
        idx: value for idx, value in enumerate(
            df_districts.loc[0:14, 'Name'].astype(str), start=0
        )
    }
    district_coord_x = df_districts.loc[start_row_pickup:14, 'Lat'].str.replace(',', '.').astype(float).tolist()
    district_coord_y = df_districts.loc[start_row_pickup:14, 'Long'].str.replace(',', '.').astype(float).tolist()
    
    # Create parameter dictionaries
    l = _create_indexed_dict(df, start_row_pickup, start_row_driver+2*m-1, 'l', int)
    a = _create_indexed_dict(df, start_row_pickup, start_row_driver+2*m-1, 'a', int)
    b = _create_indexed_dict(df, start_row_pickup, start_row_driver+2*m-1, 'b', int)
    s = _create_indexed_dict(df, start_row_pickup, start_row_driver+2*m-1, 's', int)
    
    # Create sets and derived parameters
    H = list(range(1, m + 1))  # Set of drivers
    
    # Process time windows
    tw_lb = {
        idx: pd.to_datetime(value, format="%H:%M:%S").strftime(TIME_FORMAT_OUTPUT)
        for idx, value in enumerate(
            df.loc[start_row_pickup:start_row_driver+2*m-1, 'Time a'], start=1
        )
    }
    
    tw_ub = {
        idx: pd.to_datetime(value, format="%H:%M:%S").strftime(TIME_FORMAT_OUTPUT)
        for idx, value in enumerate(
            df.loc[start_row_pickup:start_row_driver+2*m-1, 'Time b'], start=1
        )
    }
    
    # Create driver-specific sets
    R = {h: R2[h] for h in H}
    tau = [2*n + h for h in H]  # Origin nodes of drivers
    tau.insert(0, 0)
    tau_ = [2*n + m + h for h in H]  # Destination nodes of drivers
    tau_.insert(0, 0)
    
    # Create comprehensive node sets
    V = A + A_ + tau[1:] + tau_[1:]  # All nodes
    pickup_delivery = A + A_  # All pickup and delivery nodes
    
    # Driver-specific node sets
    destination = {h: tau_[h] for h in H}
    origin = {h: tau[h] for h in H}
    pickup_destination = {h: A + [destination[h]] for h in H}
    delivery_origin = {h: [origin[h]] + A_ for h in H}
    delivery_destination = {h: A_ + [destination[h]] for h in H}
    Vh = {h: [origin[h]] + A + A_ + [destination[h]] for h in H}
    origin_pickup_delivery = {h: [origin[h]] + A + A_ for h in H}
    pickup_delivery_destination = {h: pickup_delivery + [destination[h]] for h in H}
    origin_delivery_destination = A_ + tau[1:] + tau_[1:]
    
    # Create edge sets
    E = [(i, j) for i in V for j in V if i != j]
    E1 = {h: [(i, j) for i in Vh[h] for j in Vh[h] if i != j] for h in H}
    
    # Calculate distances and travel times
    d = {
        (i, j): haversine((all_x[i], all_y[i]), (all_x[j], all_y[j]))
        for i, j in E
    }
    d2 = {
        i: haversine((all_x[i], all_y[i]), (all_x[i+n], all_y[i+n]))
        for i in A
    }
    d21 = {i: 0 for i in origin_delivery_destination}
    d2.update(d21)
    
    t = {(i, j): speed * d[i, j] for i, j in E}
    
    # Generate random WTP and ETP values (with fixed seed for reproducibility)
    np.random.seed(10)
    etp_a, etp_b = 0.5, 1
    ETP = {
        h: round(np.random.uniform(etp_a, etp_b) * etp_factor, 2)
        for h in H
    }
    
    np.random.seed(10)
    wtp_a, wtp_b = 5, 10
    WTP = {
        i: round(np.random.uniform(wtp_a, wtp_b) * wtp_factor, 2)
        for i in A
    }
    
    return (
        wtp_factor, etp_factor, H, R, tau, tau_, V, pickup_delivery, ETP, WTP,
        tw_lb, tw_ub, destination, origin, pickup_destination, delivery_origin,
        delivery_destination, Vh, origin_pickup_delivery, pickup_delivery_destination,
        E, E1, d, d2, t, m, n, A, A_, a, b, l, s, R2, all_x, all_y, speed,
        district_names, district_coord_x, district_coord_y
    )


def _process_factor(factor_value):
    """Process WTP/ETP factor values that can be float or string."""
    if isinstance(factor_value, np.float64):
        return factor_value
    elif isinstance(factor_value, str):
        return float(factor_value.replace(',', '.'))
    return factor_value


def _create_indexed_dict(df, start_row, end_row, column, dtype):
    """Create an indexed dictionary from DataFrame column."""
    return {
        idx: dtype(value) for idx, value in enumerate(
            df.loc[start_row:end_row, column].astype(dtype), start=1
        )
    }


# ============================================================================
# OPTIMIZATION MODEL
# ============================================================================

def pricing_model(n, m, A, A_, H, WTP, ETP, R, d2, V, d, t, s, l, a, b, M,
                 pickup_delivery, origin_pickup_delivery, tau, tau_):
    """
    Main pricing optimization model for crowdshipping platform.
    
    This function implements a mixed-integer programming model that optimizes
    driver routes and pricing to maximize platform profit while satisfying
    various operational constraints.
    
    Args:
        n (int): Number of requests
        m (int): Number of drivers
        A (list): Pickup nodes
        A_ (list): Delivery nodes
        H (list): Set of drivers
        WTP (dict): Willingness-to-pay per request
        ETP (dict): Expected compensation per driver
        R (dict): Vehicle capacity per driver
        d2 (dict): Direct distance pickup to delivery
        V (list): All nodes
        d (dict): Distance matrix between all nodes
        t (dict): Travel time matrix
        s (dict): Service times at nodes
        l (dict): Load changes at nodes
        a (dict): Time window lower bounds
        b (dict): Time window upper bounds
        M (int): Big-M parameter for linearization
        pickup_delivery (list): Combined pickup and delivery nodes
        origin_pickup_delivery (dict): Origin, pickup and delivery nodes per driver
        tau (list): Origin nodes
        tau_ (list): Destination nodes
    
    Returns:
        tuple: Optimization results including objective value, optimal solutions,
               and runtime, or None if optimization fails
    """
    # Initialize OR-Tools MIP solver
    solver = pywraplp.Solver.CreateSolver('SCIP')
    
    if not solver:
        print("Solver konnte nicht erstellt werden!")
        return None
    
    # ========================================================================
    # DECISION VARIABLES
    # ========================================================================
    
    # Binary variable: 1 if driver h travels from node i to j, 0 otherwise
    x = {}
    for (i, j) in E:
        for h in H:
            x[i, j, h] = solver.IntVar(0, 1, f'x[{i},{j},{h}]')
    
    # Service start time at node i for driver h
    S = {}
    for i in V:
        for h in H:
            S[i, h] = solver.IntVar(0, solver.infinity(), f'S[{i},{h}]')
    
    # Load of vehicle h at node i
    L = {}
    for i in V:
        for h in H:
            L[i, h] = solver.IntVar(0, solver.infinity(), f'L[{i},{h}]')
    
    # Binary variable: 1 if request i is not served, 0 otherwise
    z = {}
    for i in A:
        z[i] = solver.IntVar(0, 1, f'z[{i}]')
    
    # Price per unit distance for arc (i,j) and driver h
    p = {}
    for (i, j) in E:
        for h in H:
            p[i, j, h] = solver.NumVar(0, solver.infinity(), f'p[{i},{j},{h}]')
    
    # Compensation per unit distance for arc (i,j) and driver h
    c = {}
    for (i, j) in E:
        for h in H:
            c[i, j, h] = solver.NumVar(0, solver.infinity(), f'c[{i},{j},{h}]')
    
    # ========================================================================
    # OBJECTIVE FUNCTION
    # ========================================================================
    
    # Maximize platform profit: total revenue minus total costs
    solver.Maximize(
        solver.Sum(
            p[i, j, h] * d2[i] - c[i, j, h] * d[i, j]
            for (i, j) in E
            for h in H
            if i != j
        )
    )
    
    # ========================================================================
    # CONSTRAINTS
    # ========================================================================
    
    # Price constraints: Price cannot exceed customer's willingness-to-pay
    for h in H:
        for i in A:
            for j in pickup_delivery:
                if i != j:
                    solver.Add(
                        p[i, j, h] <= WTP[i] / d2[i] + (1 - x[i, j, h]) * M
                    )  # Constraint (02)
    
    # Compensation constraints: Driver compensation must meet minimum expectations
    for h in H:
        for i in origin_pickup_delivery[h]:
            for j in pickup_delivery:
                if i != j:
                    solver.Add(
                        c[i, j, h] >= ETP[h] * x[i, j, h]
                    )  # Constraint (03)
    
    # Revenue sharing: Price must be at least as high as compensation
    for h in H:
        for i in A:
            for j in pickup_delivery:
                if i != j:
                    solver.Add(
                        p[i, j, h] >= c[i, j, h]
                    )  # Constraint (04)
    
    # Request assignment: Each request is either served or goes to request bank
    for i in A:
        solver.Add(
            solver.Sum(x[i, j, h] for j in pickup_delivery for h in H if i != j) + z[i] == 1
        )  # Constraint (05)
    
    # Flow conservation: Pickup and delivery must be matched for same driver
    for h in H:
        for i in A:
            solver.Add(
                solver.Sum(x[i, j, h] for j in Vh[h] if (i, j, h) in x and i != j) -
                solver.Sum(x[j, n+i, h] for j in Vh[h] if (j, n+i, h) in x and i != n+i) == 0
            )  # Constraint (06)
    
    # Driver origin constraint: Each driver starts from their origin
    for h in H:
        solver.Add(
            solver.Sum(x[origin[h], j, h] for j in pickup_destination[h]) == 1
        )  # Constraint (07)
    
    # Driver destination constraint: Each driver ends at their destination
    for h in H:
        solver.Add(
            solver.Sum(x[i, tau_[h], h] for i in delivery_origin[h] if i != tau_[h]) == 1
        )  # Constraint (08)
    
    # Flow conservation at pickup/delivery nodes
    for j in pickup_delivery:
        for h in H:
            solver.Add(
                solver.Sum(x[i, j, h] for i in V if i != j) -
                solver.Sum(x[j, i, h] for i in V if i != j) == 0
            )  # Constraint (09)
    
    # Time consistency: Ensure proper sequencing along routes
    for (i, j) in E:
        for h in H:
            solver.Add(
                S[i, h] + s[i] + t[i, j] <= S[j, h] + (1 - x[i, j, h]) * M
            )  # Constraint (10)
    
    # Time window constraints
    for i in V:
        for h in H:
            solver.Add(S[i, h] >= a[i])
            solver.Add(S[i, h] <= b[i])  # Constraint (11)
    
    # Precedence constraint: Pickup must occur before delivery
    for i in A:
        for h in H:
            solver.Add(S[i, h] <= S[n + i, h])  # Constraint (12)
    
    # Capacity flow constraints
    for (i, j) in E:
        for h in H:
            solver.Add(
                L[i, h] + l[j] <= L[j, h] + (1 - x[i, j, h]) * M
            )  # Constraint (13)
    
    # Vehicle capacity limits
    for i in V:
        for h in H:
            solver.Add(L[i, h] <= R[h])  # Constraint (14)
    
    # Initial and final load constraints
    for h in H:
        solver.Add(L[origin[h], h] == 0)  # Constraint (15.1)
        solver.Add(L[destination[h], h] == 0)  # Constraint (15.2)
    
    # ========================================================================
    # LINEARIZATION CONSTRAINTS (Big-M)
    # ========================================================================
    
    # Linearization for pricing variables
    for (i, j) in E:
        for h in H:
            if i != j:
                solver.Add(p[i, j, h] <= M * x[i, j, h])  # Constraint (23)
                solver.Add(p[i, j, h] >= 0)
    
    # Linearization for compensation variables
    for (i, j) in E:
        for h in H:
            solver.Add(c[i, j, h] <= M * x[i, j, h])  # Constraint (24)
            solver.Add(c[i, j, h] >= 0)
    
    # ========================================================================
    # ADDITIONAL CONSTRAINTS
    # ========================================================================
    
    # Additional constraint for destination nodes
    for i in tau[1:]:
        solver.Add(
            solver.Sum(x[i, j, h] for h in H for j in V if i != j) == 1
        )  # Constraint (25)
    
    # Prevent movement from destination nodes
    for h in H:
        solver.Add(
            solver.Sum(x[i, j, h] for i in tau_[1:] for h in H for j in V if i != j) == 0
        )  # Constraint (26)
    
    # ========================================================================
    # SOLVE MODEL
    # ========================================================================
    
    status = solver.Solve()
    
    if status == pywraplp.Solver.OPTIMAL:
        # Extract optimal solutions
        optimal_x = {(i, j, h): x[i, j, h].solution_value() for (i, j) in E for h in H}
        optimal_S = {(i, h): S[i, h].solution_value() for i in V for h in H}
        optimal_L = {(i, h): L[i, h].solution_value() for i in V for h in H}
        optimal_z = {i: z[i].solution_value() for i in A}
        optimal_p = {(i, j, h): p[i, j, h].solution_value() for (i, j) in E for h in H}
        optimal_c = {(i, j, h): c[i, j, h].solution_value() for (i, j) in E for h in H}
        
        objective_value = solver.Objective().Value()
        runtime = solver.WallTime()
        
        return (
            objective_value, optimal_L, optimal_S, optimal_c, 
            optimal_p, optimal_x, optimal_z, runtime
        )
    else:
        return None


# ============================================================================
# RESULTS PROCESSING
# ============================================================================

def extract_results(optimal_x, optimal_S, optimal_L, optimal_z, optimal_p, optimal_c,
                   objective_value, E, H, V, A, A_, pickup_delivery, origin_pickup_delivery,
                   tau_, d, d2, WTP, ETP):
    """
    Extract and process optimization results for analysis and visualization.
    
    Args:
        optimal_x, optimal_S, optimal_L, optimal_z, optimal_p, optimal_c: Optimal solutions
        objective_value (float): Optimal objective function value
        E, H, V, A, A_, pickup_delivery, origin_pickup_delivery: Sets from model
        tau_: Destination nodes
        d, d2: Distance matrices
        WTP, ETP: Willingness-to-pay and expected compensation
    
    Returns:
        tuple: Processed results including active arcs, tours, revenues, costs, etc.
    """
    # Extract active decision variables (values > threshold)
    active_arcs = [(i, j, h) for i, j in E for h in H if optimal_x[i, j, h] > 0.99]
    active_prices = [(i, j, h) for i, j in E for h in H if optimal_p[i, j, h] > 0.99]
    active_costs = [(i, j, h) for i, j in E for h in H if optimal_c[i, j, h] > 0.099]
    active_load = [(i, h) for i in V for h in H if optimal_L[i, h] > 0.99]
    active_z = [i for i in A if optimal_z[i] > 0.99]
    active_start = [(i, h) for i in V for h in H if optimal_S[i, h] > 0.99]
    
    # Calculate total distance
    total_distance = sum(
        optimal_x[i, j, h] * d[i, j]
        for h in H
        for i in origin_pickup_delivery[h]
        for j in pickup_delivery
        if i != j
    )
    
    # Calculate driver statistics
    driver_stops = {h: sum(optimal_x[i, j, h] for i, j in E) for h in H}
    multistop_tours = {h: driver_stops[h] for h in H}
    tours = {h: [(i, j) for i, j in E if optimal_x[i, j, h] > 0.99] for h in H}
    
    # Process multi-stop tours
    for h in H:
        if driver_stops[h] >= 2:
            multistop_tours[h] = (driver_stops[h] - 1) / 2
        else:
            if h in multistop_tours:
                del multistop_tours[h]
            if h in tours:
                del tours[h]
    
    # Calculate key metrics
    active_drivers = [h for h in tours if tours[h] != []]
    possible_matches = len(A)
    successful_matches = len(A) - sum(optimal_z[i] for i in A)
    used_drivers = sum(optimal_x[i, j, h] for i in A_ for j in tau_[1:] for h in H)
    
    # Process active arcs (remove self-loops)
    active_arcs_2d = [a[:2] for a in active_arcs]
    active = [
        (i, j) for (i, j) in active_arcs_2d
        if not (i in tau[1:] and j in tau_[1:])
    ]
    
    # Extract pricing and cost information
    chosen_c = {
        (i, j, h): optimal_c[i, j, h]
        for i, j in E for h in H
        if optimal_c[i, j, h] > 0.099
    }
    chosen_cost = {h: chosen_c[i, j, h] for i, j, h in chosen_c}
    chosen_p = {
        i: round(optimal_p[i, j, h], 2)
        for i, j in E for h in H
        if optimal_p[i, j, h] > 0.099
    }
    
    # Calculate financial metrics
    optimal_request_prices = {i: round(chosen_p[i] * d2[i], 2) for i in chosen_p}
    optimal_driver_costs = {h: round(chosen_c[i, j, h], 2) for i, j, h in chosen_c}
    
    total_revenue = round(sum(optimal_p[i, j, h] * d2[i] for i, j in E for h in H), 2)
    revenue_per_tour = {
        h: round(sum(optimal_p[i, j, h] * d2[i] for i, j in E), 2)
        for h in H
    }
    
    total_costs = round(sum(optimal_c[i, j, h] * d[i, j] for i, j in E for h in H), 2)
    costs_per_tour = {
        h: round(sum(optimal_c[i, j, h] * d[i, j] for i, j in E), 2)
        for h in H
    }
    
    margin_per_tour = {
        h: revenue_per_tour[h] - costs_per_tour[h]
        for h in active_drivers
    }
    
    # Calculate request-level metrics
    active_requests = list(chosen_p.keys())
    revenue_per_request = {i: d2[i] * chosen_p[i] for i in active_requests}
    
    driver_distance_per_request = {
        i: d[i, j] + d[a, i]
        for i in active_requests
        for j in pickup_delivery
        for h in H
        for a in origin_pickup_delivery[h]
        if (i, j) in active and (a, i) in active
    }
    
    costs_per_request = {
        i: driver_distance_per_request[i] * chosen_c[i, j, h]
        for i, j, h in chosen_c
        if i in active_requests
    }
    
    margin_per_request = {
        i: (revenue_per_request[i] - costs_per_request[i])
        for i in chosen_p
    }
    
    edge_costs = {
        (i, j): chosen_c[i, j, h]
        for h in active_drivers
        for (i, j) in tours[h]
        if j not in tau_[1:]
    }
    
    # Print summary results
    print(f"Total distance from origin to dropoff: {total_distance}")
    print(f"Active arcs (x): {active_arcs}")
    print(f"Unserved requests (z): {active_z}")
    print(f"Number of requests per driver: {multistop_tours}")
    print(f"Possible matches: {possible_matches}")
    print(f"Successful matches: {successful_matches}")
    print(f"Number of used drivers: {used_drivers}")
    print(f"Active drivers: {active_drivers}")
    print(f"Platform profit (€): {round(objective_value, 2)}")
    print(f"Margin per request: {margin_per_request}")
    
    # Print detailed pricing information
    for i in chosen_p:
        print(f"WTP for request {i}: {WTP[i]}, "
              f"chosen price: {optimal_request_prices[i]}, "
              f"price per km: {round(chosen_p[i], 2)}")
    
    for h in active_drivers:
        print(f"ETP for driver {h}: {ETP[h]}, "
              f"chosen cost per km: {optimal_driver_costs[h]}")
    
    print(f"Total revenue: {total_revenue}, Total costs: {total_costs}")
    
    return (
        active_arcs, active_prices, active_costs, active_load, active_z, active_start,
        tours, active_drivers, active_arcs_2d, active, chosen_c, chosen_cost,
        chosen_p, optimal_request_prices, optimal_driver_costs, total_revenue,
        total_costs, used_drivers, possible_matches, successful_matches, edge_costs,
        margin_per_request, margin_per_tour, active_requests
    )


def process_additional_variables(tours, H, V, E, active, tau_, active_nodes, s, t,
                               Starting_times, active_drivers, origin, destination):
    """
    Process additional variables for visualization and analysis.
    
    Args:
        tours (dict): Tours per driver
        H (list): Set of drivers
        V (list): All nodes
        E (list): All edges
        active (list): Active edges
        tau_ (list): Destination nodes
        active_nodes (list): Active nodes
        s (dict): Service times
        t (dict): Travel times
        Starting_times (dict): Starting times at nodes
        active_drivers (list): List of active drivers
        origin (dict): Origin nodes per driver
        destination (dict): Destination nodes per driver
    
    Returns:
        tuple: Processed variables for Gantt chart and analysis
    """
    # Process tours using NetworkX for proper sequencing
    tours_neu = {h: tuple for h in tours}
    
    for h in tours:
        edges = [(i, j) for (i, j) in tours[h]]
        g_tours = nx.DiGraph()
        g_tours.add_edges_from(edges)
        longest_path = nx.dag_longest_path(g_tours)
        graph_after = list(zip(longest_path[:-1], longest_path[1:]))
        tours_neu[h] = graph_after
    
    # Create sorted tour sequences
    tours_sorted = {h: [] for h in tours_neu}
    for h in tours_neu:
        liste = []
        for a in tours_neu[h]:
            liste.append(a[:1])
        if tours_neu[h]:  # Check if tour exists
            liste.append(tours_neu[h][-1][1:])
        liste = [i[0] for i in liste]
        tours_sorted[h] = liste
    
    # Calculate optimal starting times per driver
    opt_s_driver = {
        (i, h): optimal_S[i, h]
        for h in tours_sorted
        for i in tours_sorted[h]
    }
    
    # Format starting times
    starting_times_formatted = {}
    for key, value in Starting_times.items():
        time_obj = timedelta(minutes=value)
        time_str = (datetime.min + time_obj).strftime('%H:%M')
        starting_times_formatted[key] = time_str
    
    # Create activity list
    activities = []
    for h in H:
        if h not in active_drivers:
            not_driving = h * -1
            activities.append(not_driving)
        else:
            activities.append(h)
    activities.insert(0, 0)
    
    # Process driver time windows
    used_origins = {h: origin[h] for h in origin if h in active_drivers}
    tw_lb_driver = {}
    for h, i in used_origins.items():
        tw_lb_driver[h] = tw_lb[i]
    
    used_destinations = {h: destination[h] for h in destination if h in active_drivers}
    tw_ub_driver = {}
    for h, i in used_destinations.items():
        tw_ub_driver[h] = tw_ub[i]
    
    # Calculate additional metrics
    active_travel_times = {i: t[i, j] for i, j in active}
    used_etp = {h: ETP[h] for h in active_drivers}
    used_wtp = {i: WTP[i] for i in active_nodes if i in A}
    
    # Map requests to drivers
    request_driver_mapping = {
        node: driver
        for driver in tours_sorted
        for node in tours_sorted[driver]
    }
    
    # Calculate surplus metrics
    sender_surplus = round(
        sum(WTP[i] for i in active_requests) - total_revenue, 2
    )
    
    courier_surplus = round(
        total_costs - sum(
            ETP[h] * d[i, j] * optimal_x[i, j, h]
            for i, j in E for h in H
            if i in origin_pickup_delivery[h] and j in pickup_delivery
        ), 2
    )
    
    # Calculate detour metrics
    detours = {
        h: d[tours_sorted[h][-2], destination[h]]
        for h in tours_sorted
    }
    total_detours = round(sum(detours[h] for h in detours), 2)
    
    total_km_per_tour = {
        h: round(sum(d[i] for i in tours_neu[h]), 2)
        for h in tours_neu
    }
    
    origin_dest_km = {
        h: round(d[origin[h], destination[h]], 2)
        for h in H
    }
    
    detour_total_km_ratio = round(
        total_detours / sum(total_km_per_tour[h] for h in tours_sorted), 2
    )
    
    detour_tour_ratio = {
        h: round(detours[h] / total_km_per_tour[h], 2)
        for h in tours_sorted
    }
    
    km_per_request = {i: d[i, i+n] for i in A}
    detour_per_tour = {h: detours[h] / origin_dest_km[h] for h in tours_sorted}
    avg_detour_per_tour = np.mean(list(detour_per_tour.values()))
    
    origin_dest_km2 = {
        h: round(d[origin[h], destination[h]], 2)
        for h in H if h not in tours_neu
    }
    
    driven_km = (
        sum(total_km_per_tour[h] for h in total_km_per_tour) +
        sum(origin_dest_km2[h] for h in origin_dest_km2)
    )
    
    externality = driven_km / sum(origin_dest_km[h] for h in H)
    
    print(f"Tours per driver: {tours_sorted}")
    
    return (
        tours_neu, tours_sorted, used_origins, tw_lb_driver, tw_ub_driver,
        used_destinations, activities, opt_s_driver, tours, active_nodes,
        Starting_times, starting_times_formatted, active_travel_times,
        used_etp, used_wtp, request_driver_mapping, sender_surplus,
        courier_surplus, total_detours, detours, detour_total_km_ratio,
        total_km_per_tour, origin_dest_km, detour_tour_ratio, km_per_request,
        detour_per_tour, avg_detour_per_tour, origin_dest_km2, driven_km,
        externality
    )


# ============================================================================
# VISUALIZATION FUNCTIONS
# ============================================================================

def create_route_map(V, active, A, A_, tau, tau_, all_x, all_y, tours, 
                    active_drivers, district_names, district_coord_x, 
                    district_coord_y):
    """
    Create a geographical map visualization of the optimized routes.
    
    Args:
        V (list): All nodes
        active (list): Active edges
        A, A_ (list): Pickup and delivery nodes
        tau, tau_ (list): Origin and destination nodes
        all_x, all_y (list): Node coordinates
        tours (dict): Tours per driver
        active_drivers (list): Active drivers
        district_names (dict): District names
        district_coord_x, district_coord_y (list): District coordinates
    
    Returns:
        tuple: Edge colors, color map, and figure object
    """
    # Load Berlin district shapefiles
    file_path = os.path.join(os.getcwd(), "data_Berlin", "berlin_ortsteile.shp")
    districts = gpd.read_file(file_path)
    
    # Create network graph
    G = nx.DiGraph()
    G.add_nodes_from(V)
    G.add_edges_from(active)
    pos = {i: (all_x[i], all_y[i]) for i in V}
    
    # Set up the map
    fig = plt.figure(figsize=(16, 10))
    ax = fig.add_subplot(1, 1, 1, projection=ccrs.PlateCarree())
    ax = plt.axes([0, 0, 1, 1], projection=ccrs.PlateCarree())
    
    # Add geographical features
    ax.add_feature(cfeature.LAKES, edgecolor='black', alpha=0.5)
    ax.add_feature(cfeature.RIVERS, edgecolor='black', alpha=0.9)
    
    # Add district labels
    for i in district_names:
        ax.text(
            district_coord_y[i], district_coord_x[i], district_names[i],
            fontsize=8, color='grey', ha='center', va='top'
        )
    
    # Plot district boundaries
    districts.boundary.plot(ax=ax, color='gray', linewidth=1, alpha=0.5)
    ax.add_feature(
        cfeature.NaturalEarthFeature(
            category='cultural', name='admin_0_boundary_lines_land',
            scale='10m', facecolor=None
        )
    )
    ax.set_extent([13.27, 13.51, 52.450, 52.56], crs=ccrs.PlateCarree())
    
    # Add environmental zone
    file_path2 = os.path.join(os.getcwd(), "data_Berlin", "Umweltzone_Berlin.shp")
    umweltzone = gpd.read_file(file_path2)
    umweltzone = umweltzone.set_crs("EPSG:25833")
    target_crs = ccrs.PlateCarree()
    umweltzone = umweltzone.to_crs(target_crs.proj4_init)
    
    for idx, polygon in umweltzone.iterrows():
        ax.add_geometries(
            [polygon['geometry']], crs=ccrs.PlateCarree(),
            facecolor='green', edgecolor='black', alpha=0.1
        )
    
    # Define node styling options
    options = {"edgecolors": "k", "node_size": 200, "alpha": 0.5}
    
    # Draw different node types with different colors and shapes
    nx.draw_networkx_nodes(G, pos, nodelist=tau[1:], node_color="yellow", 
                          node_shape='H', **options)  # Origins
    nx.draw_networkx_nodes(G, pos, nodelist=A, node_color="tab:orange", 
                          node_shape='o', **options)  # Pickups
    nx.draw_networkx_nodes(G, pos, nodelist=A_, node_color="tab:red", 
                          node_shape='o', **options)  # Deliveries
    nx.draw_networkx_nodes(G, pos, nodelist=tau_[1:], node_color="tab:brown", 
                          node_shape='d', **options)  # Destinations
    
    # Add node labels
    nx.draw_networkx_labels(G, pos, labels={n: n for n in G}, 
                           font_size=8, font_color='k')
    
    # Color edges by driver
    edges = list(G.edges())
    num_edges = len(edges) + 1
    colormap = plt.colormaps['tab20']
    colors = [colormap(i / num_edges) for i in range(max(tours) + 1)]
    edge_colors = {a: colors[h] for h in tours for a in tours[h]}
    driver_colors = {h: colors[h] for h in active_drivers}
    edge_color_list = [edge_colors[a] for a in edges]
    
    # Draw edges
    nx.draw_networkx_edges(
        G, pos, edge_color=edge_color_list, alpha=1.0, width=1.5,
        arrows=True, arrowsize=12, ax=ax, connectionstyle="arc3,rad=0.1"
    )
    
    # Configure map display
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_xlim(13.27, 13.51)
    ax.set_ylim(52.450, 52.562)
    ax.gridlines(draw_labels=True)
    ax.add_feature(cfeature.LAND)
    plt.title("Route Map: IPIC_SP_p")
    plt.show()
    
    return edge_colors, colors, num_edges, driver_colors, fig


def create_gantt_chart(tours_sorted, H, Starting_times, s, active_t, tau_, 
                      active_nodes, tw_lb_driver, tw_ub_driver, driver_colors):
    """
    Create a Gantt chart visualization of driver schedules.
    
    Args:
        tours_sorted (dict): Sorted tours per driver
        H (list): Set of drivers
        Starting_times (dict): Starting times at nodes
        s (dict): Service times
        active_t (dict): Active travel times
        tau_ (list): Destination nodes
        active_nodes (list): Active nodes
        tw_lb_driver, tw_ub_driver (dict): Driver time windows
        driver_colors (dict): Colors per driver
    
    Returns:
        tuple: Ending times, formatted times, DataFrame, and figure object
    """
    # Calculate ending times
    ending_times = {
        i: (Starting_times[i] + s[i] + active_t[i])
        for i in Starting_times
        if i not in tau_[1:]
    }
    ending_times2 = {
        i: Starting_times[i]
        for i in tau_[1:]
        if i in active_nodes
    }
    ending_times.update(ending_times2)
    
    # Format ending times
    ending_times_formatted = {}
    for key, value in ending_times.items():
        time_obj = timedelta(minutes=value)
        time_str = (datetime.min + time_obj).strftime('%H:%M')
        ending_times_formatted[key] = time_str
    
    # Calculate service start and end times
    start_3 = {
        i: Starting_times[i] + s[i]
        for i in Starting_times
        if i not in tau_[1:]
    }
    start_32 = {
        i: Starting_times[i]
        for i in tau_[1:]
        if i in active_nodes
    }
    start_3.update(start_32)
    
    start3 = {}
    for key, value in start_3.items():
        time_obj = timedelta(minutes=value)
        time_str = (datetime.min + time_obj).strftime('%H:%M')
        start3[key] = time_str
    
    end_3 = {
        i: Starting_times[i] + s[i] + active_t[i]
        for i in Starting_times
        if i not in tau_[1:]
    }
    end_32 = {
        i: Starting_times[i]
        for i in tau_[1:]
        if i in active_nodes
    }
    end_3.update(end_32)
    
    end3 = {}
    for key, value in end_3.items():
        time_obj = timedelta(minutes=value)
        time_str = (datetime.min + time_obj).strftime('%H:%M')
        end3[key] = time_str
    
    # Create DataFrame for Gantt chart
    df = []
    for h, i in tours_sorted.items():
        for j in i:
            df.append({
                'task': j,
                'Start': starting_times_formatted[j],
                'Finish': ending_times_formatted[j],
                'Resource': h,
                'color': driver_colors[h],
                'start2': tw_lb_driver[h],
                'end2': tw_ub_driver[h],
                'start3': start3[j],
                'end3': end3[j]
            })
    
    # Create Gantt chart
    fig, ax = plt.subplots(figsize=(16, 8))
    
    for item in df:
        start = datetime.strptime(item['Start'], '%H:%M')
        finish = datetime.strptime(item['Finish'], '%H:%M')
        duration = finish - start
        text_x = start + duration / 5
        text_y = item['Resource']
        
        # Draw service time bars
        ax.broken_barh(
            [(start, duration)], (item['Resource'] - 0.25, 0.5),
            facecolors=item['color'], zorder=3, alpha=0.5
        )
        
        # Add task labels
        ax.annotate(
            item['task'], (text_x, text_y), xytext=(0, 0),
            textcoords='offset points', ha='center', va='bottom',
            fontsize=9, color='black', zorder=5
        )
        
        # Draw time windows and travel times
        start2 = datetime.strptime(item['start2'], '%H:%M')
        end2 = datetime.strptime(item['end2'], '%H:%M')
        start3 = datetime.strptime(item['start3'], '%H:%M')
        end3 = datetime.strptime(item['end3'], '%H:%M')
        
        duration2 = end2 - start2
        duration3 = end3 - start3
        
        ax.broken_barh(
            [(start2, duration2)], (item['Resource'] - 0.25, 0.5),
            facecolors=item['color'], zorder=2, alpha=0.1
        )
        ax.broken_barh(
            [(start3, duration3)], (item['Resource'] - 0.25, 0.5),
            facecolor='none', edgecolor='lightgrey', hatch='//', zorder=4
        )
    
    # Create legend
    tour_duration = mpatches.Patch(color='tab:red', label='Servicezeit am Knoten')
    driver_tw = mpatches.Patch(color='pink', label='Zeitfenster der Fahrer in hell')
    travel_time = mpatches.Patch(
        facecolor='none', hatch='//', edgecolor='grey', label='Fahrtzeit'
    )
    node_number = mpatches.Patch(color='white', label='1,2,.: Nr besuchter Knoten')
    
    ax.legend(
        handles=[tour_duration, driver_tw, travel_time, node_number],
        loc='lower right'
    )
    
    # Configure chart
    plt.xlabel("Zeit")
    plt.ylabel("Fahrer")
    plt.title("Gantt Chart: IPIC_SP_p")
    
    y_ticks = list(tours_sorted.keys())
    ax.set_yticks(y_ticks)
    ax.set_yticklabels(y_ticks)
    
    x_ticks = pd.date_range(
        start=datetime(1900, 1, 1, 10, 0),
        end=datetime(1900, 1, 1, 20, 0),
        freq='h'
    )
    x_labels = [time.strftime('%H:%M') for time in x_ticks]
    ax.set_xticks(x_ticks)
    ax.set_xticklabels(x_labels, rotation=45, ha='right')
    ax.grid(zorder=0)
    plt.show()
    
    return ending_times, ending_times_formatted, df, start3, fig


# ============================================================================
# MAIN EXECUTION WORKFLOW
# ============================================================================

def main():
    """
    Main function to execute the complete pricing model workflow.
    
    This function orchestrates the entire process from data loading to
    optimization and visualization.
    """
    # Setup environment
    print("Setting up environment...")
    install_requirements()
    setup_repository()
    
    # Load and process data
    print("Loading and processing data...")
    all_data = read_data_create_sets(TWD_DATA_PATH, DISTRICTS_DATA_PATH)
    (
        wtp_factor, etp_factor, H, R, tau, tau_, V, pickup_delivery, ETP, WTP,
        tw_lb, tw_ub, destination, origin, pickup_destination, delivery_origin,
        delivery_destination, Vh, origin_pickup_delivery, pickup_delivery_destination,
        E, E1, d, d2, t, m, n, A, A_, a, b, l, s, R2, all_x, all_y, speed,
        district_names, district_coord_x, district_coord_y
    ) = all_data
    
    # Run optimization model
    print("Running optimization model...")
    result = pricing_model(
        n, m, A, A_, H, WTP, ETP, R, d2, V, d, t, s, l, a, b, M,
        pickup_delivery, origin_pickup_delivery, tau, tau_
    )
    
    if result is not None:
        (
            objective_value, optimal_L, optimal_S, optimal_c,
            optimal_p, optimal_x, optimal_z, runtime
        ) = result
        
        print(f"Optimization successful! Runtime: {runtime}ms")
        
        # Extract and process results
        print("Processing results...")
        results_data = extract_results(
            optimal_x, optimal_S, optimal_L, optimal_z, optimal_p, optimal_c,
            objective_value, E, H, V, A, A_, pickup_delivery, origin_pickup_delivery,
            tau_, d, d2, WTP, ETP
        )
        
        (
            active_arcs, active_prices, active_costs, active_load, active_z,
            active_start, tours, active_drivers, active_arcs_2d, active,
            chosen_c, chosen_cost, chosen_p, optimal_request_prices,
            optimal_driver_costs, total_revenue, total_costs, used_drivers,
            possible_matches, successful_matches, edge_costs, margin_per_request,
            margin_per_tour, active_requests
        ) = results_data
        
        # Process additional variables
        print("Processing additional variables...")
        additional_vars = process_additional_variables(
            tours, H, V, E, active, tau_, active_nodes, s, t,
            Starting_times, active_drivers, origin, destination
        )
        
        # Create visualizations
        print("Creating route map...")
        map_result = create_route_map(
            V, active, A, A_, tau, tau_, all_x, all_y, tours,
            active_drivers, district_names, district_coord_x, district_coord_y
        )
        
        print("Creating Gantt chart...")
        gantt_result = create_gantt_chart(
            tours_sorted, H, Starting_times, s, active_t, tau_,
            active_nodes, tw_lb_driver, tw_ub_driver, driver_colors
        )
        
        print("Analysis complete!")
        
        return {
            'optimization_result': result,
            'processed_results': results_data,
            'additional_variables': additional_vars,
            'map_visualization': map_result,
            'gantt_visualization': gantt_result
        }
    else:
        print("Optimization failed.")
        return None


# ============================================================================
# UTILITY FUNCTIONS
# ============================================================================

def print_solution_summary(results_dict):
    """
    Print a comprehensive summary of the optimization results.
    
    Args:
        results_dict (dict): Dictionary containing all results from main()
    """
    if results_dict is None:
        print("No results to summarize - optimization failed.")
        return
    
    objective_value = results_dict['optimization_result'][0]
    runtime = results_dict['optimization_result'][-1]
    
    print("\n" + "="*60)
    print("CROWDSHIPPING PRICING MODEL - SOLUTION SUMMARY")
    print("="*60)
    print(f"Objective Value (Platform Profit): €{objective_value:.2f}")
    print(f"Optimization Runtime: {runtime}ms")
    print("="*60)


# ============================================================================
# EXECUTION
# ============================================================================

if __name__ == "__main__":
    # Execute the main workflow
    results = main()
    print_solution_summary(results)