# ¬ß 5. UNIT ECONOMICS ANALYSIS

---

## Research Question 2.3: Revenue Optimization by Trip Type

**Hypothesis:** Short Manhattan trips and airport runs generate highest revenue per kilometer due to premium pricing, while long-distance commuter trips show economies of scale with lower per-km rates.

**Methodology:**
- Calculate cost_per_km segmented by trip_archetype
- Horizontal bar chart ranking trip types by unit economics
- Contribution analysis (volume √ó unit price) for revenue prioritization

**Expected Insights:**
- Most profitable trip segments for driver/platform
- Volume vs margin trade-offs
- Strategic focus areas for network optimization

In [2]:
# =============================================================================
# PROJECT ATLAS: 02c. UNIT ECONOMICS ANALYSIS
# =============================================================================
#
# OBJECTIVE: Analyze revenue optimization by trip archetype
# DATA SOURCE: tlc_sample_*_processed.parquet
# =============================================================================

# -----------------------------------------------------------------------------
# ¬ß 1. ENVIRONMENT SETUP
# -----------------------------------------------------------------------------

import polars as pl
import pandas as pd
import numpy as np
import os
import glob
import warnings
import plotly.graph_objects as go
import plotly.io as pio
from pathlib import Path
from typing import Dict

warnings.filterwarnings('ignore')

# Configuration
SAMPLE_DIR = './HVFHV subsets 2019-2025 - Samples/'

DATA_PATHS = {
    'sample_pattern': os.path.join(SAMPLE_DIR, 'tlc_sample_*_processed.parquet')
}

# =============================================================================
# PLOTLY + UBER STYLE BOOTSTRAP
# =============================================================================
from pathlib import Path
import plotly.io as pio

import uber_style as ub 

pio.templates["uber"] = ub.uber_style_template
pio.templates.default = "uber"

from uber_style import *

PLOT_DIR = Path("plots")
PLOT_DIR.mkdir(exist_ok=True)


def _plot_paths(fig_name: str):
    """Return path json + html for 1 figure name."""
    json_path = PLOT_DIR / f"{fig_name}.json"
    html_path = PLOT_DIR / f"{fig_name}.html"
    return json_path, html_path


def load_plot_if_exists(fig_name: str):
    """
    If JSON file of the figure exists:
        -> return (fig, True)
    If not exists:
        -> return (None, False)
    """
    json_path, _ = _plot_paths(fig_name)
    if json_path.exists():
        with open(json_path, "r", encoding="utf-8") as f:
            fig = pio.from_json(f.read())
        return fig, True
    return None, False


def save_plot(fig, fig_name: str):
    """
    Save figure as JSON + HTML (no show).
    """
    json_path, html_path = _plot_paths(fig_name)

    # JSON
    with open(json_path, "w", encoding="utf-8") as f:
        f.write(pio.to_json(fig))

    # HTML
    pio.write_html(
        fig,
        file=str(html_path),
        include_plotlyjs="cdn",
        auto_open=False
    )

print("‚úÖ Environment configured successfully")
print(f"   - Notebook: 02c_Unit_Economics")

‚úÖ Environment configured successfully
   - Notebook: 02c_Unit_Economics


In [3]:
# -----------------------------------------------------------------------------
# ¬ß 2. DATA LOADING & PREPARATION
# -----------------------------------------------------------------------------

def load_sample_data_pricing(pattern: str) -> pl.DataFrame:
    """
    Load sample data with focus on pricing-related columns.
    Automatically creates derived features (time_of_day_bin, cultural_day_type) if missing.
    
    Args:
        pattern: Glob pattern for sample files
    
    Returns:
        Polars DataFrame with pricing columns and engineered features
    """
    sample_files = sorted(glob.glob(pattern))
    
    if not sample_files:
        raise FileNotFoundError(f"No files found matching pattern: {pattern}")
    
    print(f"   üìÇ Loading {len(sample_files)} sample files for pricing analysis...")
    
    # Load all columns first - we'll create derived features if needed
    df = pl.read_parquet(sample_files)
    
    # Feature Engineering: Create time_of_day_bin if not present
    if 'time_of_day_bin' not in df.columns:
        print("   üîß Creating time_of_day_bin from pickup_hour...")
        if 'pickup_hour' not in df.columns:
            df = df.with_columns([
                pl.col('pickup_datetime').dt.hour().alias('pickup_hour')
            ])
        df = df.with_columns([
            pl.when(pl.col('pickup_hour').is_between(6, 9))
            .then(pl.lit('morning_rush'))
            .when(pl.col('pickup_hour').is_between(10, 15))
            .then(pl.lit('midday'))
            .when(pl.col('pickup_hour').is_between(16, 19))
            .then(pl.lit('evening_rush'))
            .when(pl.col('pickup_hour').is_between(20, 22))
            .then(pl.lit('evening'))
            .otherwise(pl.lit('late_night'))
            .alias('time_of_day_bin')
        ])
    
    # Feature Engineering: Create cultural_day_type if not present
    if 'cultural_day_type' not in df.columns:
        print("   üîß Creating cultural_day_type from day of week...")
        df = df.with_columns([
            pl.col('pickup_datetime').dt.weekday().alias('pickup_dow')
        ])
        df = df.with_columns([
            pl.when((pl.col('pickup_dow').is_in([5, 6])) & (pl.col('pickup_hour') >= 18))
            .then(pl.lit('weekend_night'))
            .when(pl.col('pickup_dow') == 7)
            .then(pl.lit('sunday_rest'))
            .when(pl.col('pickup_dow').is_in([6, 7]))
            .then(pl.lit('weekend_day'))
            .otherwise(pl.lit('workday'))
            .alias('cultural_day_type')
        ])
    
    # Columns needed for pricing analysis (now guaranteed to exist)
    pricing_columns = [
        'pickup_datetime', 'trip_km', 'duration_min', 'speed_kmh',
        'total_rider_cost', 'base_passenger_fare', 'driver_pay',
        'tips', 'tolls', 'congestion_surcharge', 'airport_fee', 'cbd_congestion_fee',
        'pickup_borough', 'dropoff_borough', 'trip_archetype',
        'time_of_day_bin', 'cultural_day_type', 'pickup_hour',
        'cost_per_km', 'tipping_pct', 'driver_revenue_share'
    ]
    
    # Select only needed columns to reduce memory
    df = df.select(pricing_columns)
    
    # Data quality filtering (use quantile-based threshold instead of hard cutoff)
    price_99th = df.select(pl.col('total_rider_cost').quantile(0.999)).item()
    print(f"   üìä Price threshold (99.9th percentile): ${price_99th:.2f}")
    
    df_clean = df.filter(
        (pl.col('trip_km') > 0) & 
        (pl.col('duration_min') > 0) &
        (pl.col('total_rider_cost') > 0) &
        (pl.col('total_rider_cost') <= price_99th) &  # Quantile-based filtering
        (pl.col('base_passenger_fare') > 0)
    )
    
    print(f"   ‚úÖ Loaded: {df_clean.height:,} trips ({df.height - df_clean.height:,} filtered)")
    
    return df_clean

# Execute data loading pipeline
print("‚è≥ Loading data for pricing analysis...")
print("-" * 60)

try:
    # Load sample data for non-linearity detection
    print("üìä Loading Sample Data (tlc_sample_*_processed)...")
    df_sample = load_sample_data_pricing(DATA_PATHS['sample_pattern'])
    print(f"   üíæ Memory footprint: {df_sample.estimated_size('mb'):.1f} MB")

    
    print("\n" + "=" * 60)
    print("‚úÖ DATA LOADING COMPLETE - Ready for pricing analysis")
    print("=" * 60)
    
except Exception as e:
    print(f"\n‚ùå ERROR: Data loading failed")
    print(f"   Details: {str(e)}")
    raise

‚è≥ Loading data for pricing analysis...
------------------------------------------------------------
üìä Loading Sample Data (tlc_sample_*_processed)...
   üìÇ Loading 7 sample files for pricing analysis...
   üìä Price threshold (99.9th percentile): $175.95
   üìä Price threshold (99.9th percentile): $175.95
   ‚úÖ Loaded: 9,820,414 trips (9,827 filtered)
   üíæ Memory footprint: 997.2 MB

‚úÖ DATA LOADING COMPLETE - Ready for pricing analysis
   ‚úÖ Loaded: 9,820,414 trips (9,827 filtered)
   üíæ Memory footprint: 997.2 MB

‚úÖ DATA LOADING COMPLETE - Ready for pricing analysis


In [4]:
# =============================================================================
# ¬ß 5. UNIT ECONOMICS ANALYSIS BY TRIP ARCHETYPE ‚Äî UBER STYLE FORMAT
# =============================================================================

def analyze_unit_economics(df: pl.DataFrame) -> Dict:
    """
    Calculate unit economics metrics by trip archetype.
    """
    df_filtered = df.filter(
        pl.col('trip_archetype').is_not_null() &
        (pl.col('cost_per_km') > 0) & (pl.col('cost_per_km') <= 15) &
        (pl.col('total_rider_cost') > 0)
    )

    archetype_metrics = (
        df_filtered
        .group_by('trip_archetype')
        .agg([
            pl.col('cost_per_km').median().alias('median_cost_per_km'),
            pl.col('cost_per_km').mean().alias('mean_cost_per_km'),
            pl.col('total_rider_cost').median().alias('median_total_cost'),
            pl.col('trip_km').median().alias('median_distance'),
            pl.count().alias('trip_count'),
            pl.col('total_rider_cost').sum().alias('total_revenue')
        ])
        .with_columns([
            (pl.col('total_revenue') / pl.col('trip_count')).alias('avg_revenue_per_trip'),
            (pl.col('trip_count') / pl.col('trip_count').sum()).alias('volume_share')
        ])
        .sort('median_cost_per_km', descending=True)
        .to_pandas()
    )

    archetype_metrics['revenue_share'] = (
        archetype_metrics['total_revenue'] /
        archetype_metrics['total_revenue'].sum()
    )

    return {
        'archetype_metrics': archetype_metrics,
        'sample_size': df_filtered.height
    }

In [5]:
# =============================================================================
# 5.1 Cost per KM by Trip Archetype ‚Äî VISUALIZATION
# =============================================================================

print("\n" + "=" * 80)
print("ANALYSIS 2.4: UNIT ECONOMICS BY TRIP ARCHETYPE")
print("=" * 80)

economics_analysis = analyze_unit_economics(df_sample)
archetype_metrics = economics_analysis["archetype_metrics"]

print(f"\nüìä Unit Economics Analysis:")
print(f"   Sample size: {economics_analysis['sample_size']:,} trips")

print("\n   Revenue per KM by Trip Type (Ranked):")
for _, row in archetype_metrics.iterrows():
    print(f"   {row['trip_archetype']:30s}: "
          f"${row['median_cost_per_km']:.2f}/km | "
          f"Vol {row['volume_share']*100:5.1f}% | "
          f"Rev {row['revenue_share']*100:5.1f}%")


ANALYSIS 2.4: UNIT ECONOMICS BY TRIP ARCHETYPE

üìä Unit Economics Analysis:
   Sample size: 9,595,303 trips

   Revenue per KM by Trip Type (Ranked):
   commute                       : $4.50/km | Vol  24.3% | Rev  21.6%
   leisure                       : $4.13/km | Vol  56.8% | Rev  49.6%
   nightlife                     : $4.12/km | Vol  11.5% | Rev  10.5%
   airport                       : $3.21/km | Vol   7.4% | Rev  18.3%

üìä Unit Economics Analysis:
   Sample size: 9,595,303 trips

   Revenue per KM by Trip Type (Ranked):
   commute                       : $4.50/km | Vol  24.3% | Rev  21.6%
   leisure                       : $4.13/km | Vol  56.8% | Rev  49.6%
   nightlife                     : $4.12/km | Vol  11.5% | Rev  10.5%
   airport                       : $3.21/km | Vol   7.4% | Rev  18.3%


In [None]:
# =============================================================================
# FIGURE 2.4 ‚Äî UNIT ECONOMICS BY ARCHETYPE (Clean & Polished)
# =============================================================================

FIG_NAME = "fig_2_4_unit_economics_archetype"

# Custom Uber Green Scale
UBER_GREEN_SCALE = [
    [0.0, "#F6F6F6"],   # Low revenue
    [0.3, "#D3EFDE"],   
    [0.6, "#47B275"],   
    [1.0, "#0E3F25"]    # High revenue
]

# ------------------------------------------------------------
# TRY LOAD FROM CACHE
# ------------------------------------------------------------
fig, loaded = load_plot_if_exists(FIG_NAME)

if not loaded:

    # 1. PREPARE DATA
    df_plot = archetype_metrics.copy()
    df_plot = df_plot.sort_values("median_cost_per_km", ascending=True)

    # Calculate Overall Median
    try:
        if isinstance(df_sample, pd.DataFrame):
            overall_median_ppk = df_sample[
                (df_sample['cost_per_km'] > 0) & (df_sample['cost_per_km'] <= 15)
            ]['cost_per_km'].median()
        else:
            overall_median_ppk = df_sample.filter(
                (pl.col('cost_per_km') > 0) & (pl.col('cost_per_km') <= 15)
            )['cost_per_km'].median()
    except:
        overall_median_ppk = 3.99

    # ------------------------------------------------------------
    # 2. BUILD FIGURE
    # ------------------------------------------------------------
    fig = go.Figure()

    # --- Horizontal Bar Trace ---
    fig.add_trace(
        go.Bar(
            y=df_plot["trip_archetype"],
            x=df_plot["median_cost_per_km"],
            orientation='h',
            
            marker=dict(
                color=df_plot["revenue_share"],
                colorscale=UBER_GREEN_SCALE,
                showscale=True,
                line=dict(color=UBER_BLACK, width=1),
                
                # Compact colorbar
                colorbar=dict(
                    title=dict(text="Rev Share", side="top"),
                    tickformat=".0%",
                    ticks="",
                    ticklen=0,
                    thickness=12,
                    len=0.5,
                    x=1.02, 
                    outlinecolor=GRAY_300,
                    outlinewidth=0
                )
            ),
            hoverinfo="none",
            name="Metrics"
        )
    )

    # --- Reference Line (Overall Median) ---
    # IMPORTANT: layer="below" puts the line behind the bars
    fig.add_vline(
        x=overall_median_ppk,
        line_width=1.5,
        line_dash="dot", 
        line_color=UBER_PURPLE,
        opacity=0.5,
        layer="below" 
    )
    
    # Reference Line Annotation (with white background)
    fig.add_annotation(
        x=overall_median_ppk,
        y=1,
        xref="x", yref="paper",
        text=f"Median: ${overall_median_ppk:.2f}",
        showarrow=False,
        xanchor="left",
        yanchor="bottom",
        font=dict(color=UBER_PURPLE, size=10),
        bgcolor="white", # Che ƒë∆∞·ªùng k·∫ª ph√≠a sau ch·ªØ
        yshift=5,
        xshift=5
    )

    # ------------------------------------------------------------
    # 3. ANNOTATIONS (Labels logic - DECLUTTERED)
    # ------------------------------------------------------------
    for idx, row in df_plot.iterrows():
        val = row["median_cost_per_km"]
        vol_share = row["volume_share"]
        rev_share = row["revenue_share"]
        
        # Contrast text color logic
        inner_text_color = UBER_WHITE if rev_share > 0.3 else GRAY_600

        # Label 1: Volume Share -> Simplified to "%" only
        fig.add_annotation(
            x=val,
            y=row["trip_archetype"],
            text=f"<i>{vol_share:.1%}"[:-1] + "%</i>", # Just number + %
            font=dict(color=inner_text_color, size=10),
            showarrow=False,
            xanchor="right",
            xshift=-8 
        )
        
        # Label 2: Price -> Bold format
        fig.add_annotation(
            x=val,
            y=row["trip_archetype"],
            text=f"<b>${val:.2f}</b>",
            font=dict(color=GRAY_900, size=12),
            showarrow=False,
            xanchor="left",
            xshift=8 
        )

    # ------------------------------------------------------------
    # 4. LAYOUT
    # ------------------------------------------------------------
    fig.update_layout(
        template="plotly_white",
        width=1200,
        height=600,
        margin=dict(l=150, r=100, t=100, b=100),
        
        title=dict(
            text=(
                f"<b style='font-size:20px; color:{GRAY_900}'>Unit Economics by Trip Archetype</b><br>"
                f"<span style='font-size:14px; color:{GRAY_600}'>Median Cost per KM ($) ranked by profitability "
                f"(Color = Revenue Share)</span>"
            ),
            x=0.0,
            y=0.95,
            xanchor="left"
        ),
        
        xaxis=dict(
            visible=False, # Hide X-axis for cleaner look
            range=[0, df_plot["median_cost_per_km"].max() * 1.25] 
        ),
        
        yaxis=dict(
            showgrid=False,
            showline=False,
            tickfont=dict(size=13, color=GRAY_900, weight="bold")
        ),
        
        showlegend=False,
        hovermode=False
    )

    # Caption
    caption_text = (
        "<b>Note:</b> Inner label shows Volume Share (%).<br>" # Explain % here instead of repeating on chart
        "Long-distance trips (Airport) yield lower unit margin but contribute heavily to total revenue."
    )
    
    fig.add_annotation(
        x=0, y=-0.1,
        xref="paper", yref="paper",
        text=caption_text,
        showarrow=False,
        font=dict(size=11, color=GRAY_600),
        align="left",
        xanchor="left"
    )

    # Save
    save_plot(fig, FIG_NAME)

# Show
#fig.show()

if loaded:
    print(f"   ‚úÖ {FIG_NAME} loaded from cache")
else:
    print(f"   ‚úÖ {FIG_NAME} generated and saved")

   ‚úÖ fig_2_4_unit_economics_archetype generated and saved


In [12]:
# -------------------------------------------------------------------
# 1. Data prep
# -------------------------------------------------------------------
df = archetype_metrics.copy()

volume_threshold = df["volume_share"].median()
margin_threshold = df["median_cost_per_km"].median()

def classify(row):
    v = "High Volume" if row["volume_share"] > volume_threshold else "Low Volume"
    m = "High Margin" if row["median_cost_per_km"] > margin_threshold else "Low Margin"
    return f"{v} / {m}"

df["segment"] = df.apply(classify, axis=1)

# Color palette (pastel consulting palette)
color_map = {
    "High Volume / High Margin": "#2ECC71",  # green (stars)
    "High Volume / Low Margin": "#3498DB",   # blue (volume plays)
    "Low Volume / High Margin": "#F1C40F",   # yellow (premium niche)
    "Low Volume / Low Margin": "#95A5A6"     # gray (deprioritize)
}

# -------------------------------------------------------------------
# 2. Normalize revenue share ‚Üí bubble AREA (correct visual scaling)
# -------------------------------------------------------------------
rev = df["revenue_share"]
min_area = 250
max_area = 3200

bubble_area = min_area + (rev - rev.min()) / (rev.max() - rev.min()) * (max_area - min_area)
df["bubble_radius"] = np.sqrt(bubble_area)  # Plotly expects radius not area

In [None]:
# =============================================================================
# FIGURE 2.5 ‚Äî REVENUE CONTRIBUTION MATRIX (Plotly + Uber Style + SWD)
# =============================================================================

FIG_NAME = "fig_2_5_revenue_contribution_matrix"

# ------------------------------------------------------------
# TRY LOAD FROM CACHE
# ------------------------------------------------------------
fig, loaded = load_plot_if_exists(FIG_NAME)

if not loaded:

    # ------------------------------------------------------------
    # 1. PREPARE DATA (Segment Classification Logic)
    # ------------------------------------------------------------
    # Assumes archetype_metrics exists from previous step
    df = archetype_metrics.copy()

    # Define Thresholds
    # Volume threshold: average volume share (e.g., 25%)
    # Margin threshold: system-wide average price
    volume_threshold = 0.20  # Example: 20% market share
    try:
        if isinstance(df_sample, pd.DataFrame):
            margin_threshold = df_sample['cost_per_km'].median()
        else:
            margin_threshold = 3.99 # Fallback
    except:
        margin_threshold = 3.99

    # Segment Classification Function
    def get_segment(row):
        is_high_vol = row['volume_share'] >= volume_threshold
        is_high_margin = row['median_cost_per_km'] >= margin_threshold
        
        if is_high_vol and is_high_margin:
            return "High Volume / High Margin" # Stars
        elif not is_high_vol and is_high_margin:
            return "Low Volume / High Margin"  # Premium Niche
        elif is_high_vol and not is_high_margin:
            return "High Volume / Low Margin"  # Volume Plays
        else:
            return "Low Volume / Low Margin"   # Deprioritize

    df['segment'] = df.apply(get_segment, axis=1)

    # Calculate Bubble Radius based on Revenue Share
    # Scale up for clear visualization
    df['bubble_radius'] = np.sqrt(df['revenue_share']) * 50 

    # ------------------------------------------------------------
    # 2. SETUP COLORS (Uber Palette Mapped)
    # ------------------------------------------------------------
    color_map = {
        "High Volume / High Margin": UBER_GREEN,   # Stars -> Green (Focus)
        "Low Volume / High Margin":  UBER_PURPLE,  # Premium -> Purple
        "High Volume / Low Margin":  UBER_ORANGE,  # Volume Plays -> Orange (Warning)
        "Low Volume / Low Margin":   GRAY_500      # Deprioritize -> Gray
    }

    # ------------------------------------------------------------
    # 3. BUILD PLOT
    # ------------------------------------------------------------
    fig = go.Figure()

    for _, row in df.iterrows():
        segment_color = color_map.get(row["segment"], GRAY_500)
        
        fig.add_trace(go.Scatter(
            x=[row["volume_share"] * 100],
            y=[row["median_cost_per_km"]],
            mode="markers+text",
            marker=dict(
                size=row["bubble_radius"],
                color=segment_color,
                opacity=0.85,
                line=dict(color="white", width=1.5) # Thicker white border for prominence
            ),
            # Only show name if bubble is large enough (rev share > 5%)
            text=[row["trip_archetype"] if row["revenue_share"] > 0.05 else ""],
            textposition="top center",
            textfont=dict(size=11, color=GRAY_900, family="Arial", weight="bold"),
            hovertemplate=(
                f"<b>{row['trip_archetype']}</b><br>"
                + f"Segment: {row['segment']}<br>"
                + "Volume: %{x:.1f}%<br>"
                + f"Median Cost/km: ${row['median_cost_per_km']:.2f}<br>"
                + f"Revenue Share: {row['revenue_share']*100:.1f}%<br>"
                + "<extra></extra>"
            ),
            showlegend=False
        ))

    # ------------------------------------------------------------
    # 4. REFERENCE LINES (Quadrants)
    # ------------------------------------------------------------
    # Vertical Line (Volume Threshold)
    fig.add_vline(
        x=volume_threshold * 100,
        line_width=1, line_dash="dash", line_color=GRAY_500
    )

    # Horizontal Line (Margin Threshold)
    fig.add_hline(
        y=margin_threshold,
        line_width=1, line_dash="dash", line_color=GRAY_500
    )

    # ------------------------------------------------------------
    # 5. QUADRANT LABELS (Consulting Style)
    # ------------------------------------------------------------
    # Relative coordinates for 4-corner labels
    x_min, x_max = 0, df["volume_share"].max() * 100 * 1.1
    y_min, y_max = df["median_cost_per_km"].min() * 0.9, df["median_cost_per_km"].max() * 1.1

    # Calculate label positions (Hardcoded logic based on threshold for layout)
    # Quadrants are determined relative to thresholds
    
    quad_configs = [
        # (Text, x_pos, y_pos, color)
        ("<b>PREMIUM NICHE</b><br>(Low Vol / High Margin)", 
         volume_threshold*100*0.5, margin_threshold*1.1, UBER_PURPLE),
         
        ("<b>STARS ‚≠ê</b><br>(High Vol / High Margin)", 
         volume_threshold*100*1.5, margin_threshold*1.1, UBER_GREEN),
         
        ("<b>DEPRIORITIZE</b><br>(Low Vol / Low Margin)", 
         volume_threshold*100*0.5, margin_threshold*0.9, GRAY_500),
         
        ("<b>VOLUME PLAYS</b><br>(High Vol / Low Margin)", 
         volume_threshold*100*1.5, margin_threshold*0.9, UBER_ORANGE)
    ]

    for text, x, y, color in quad_configs:
        # Only add if coordinates are within reasonable range (avoid display errors with skewed data)
        fig.add_annotation(
            x=x, y=y,
            text=text,
            showarrow=False,
            font=dict(size=10, color=color),
            align="center",
            bgcolor="rgba(255,255,255,0.8)", # Semi-transparent white for better text visibility
            bordercolor=color,
            borderwidth=1,
            borderpad=4
        )

    # ------------------------------------------------------------
    # 6. LAYOUT & STYLING
    # ------------------------------------------------------------
    fig.update_layout(
        template="plotly_white",
        width=950,
        height=650,
        margin=dict(l=80, r=40, t=100, b=100),
        
        title=dict(
            text=(
                f"<b style='color:{GRAY_900}; font-size:20px'>Figure 2.5 ‚Äî Revenue Contribution Matrix</b><br>"
                f"<span style='color:{GRAY_600}; font-size:14px'>Strategic segmentation: Volume √ó Margin (Bubble Size = Revenue Share)</span>"
            ),
            x=0.0,
            y=0.95,
            xanchor="left"
        ),
        
        xaxis=dict(
            title="Volume Share (% of total trips)",
            title_font=dict(size=12, color=GRAY_600),
            gridcolor=GRAY_300,
            ticksuffix="%",
            zeroline=False,
            range=[0, x_max] # Start from 0
        ),
        
        yaxis=dict(
            title="Median Cost per KM ($)",
            title_font=dict(size=12, color=GRAY_600),
            gridcolor=GRAY_300,
            tickprefix="$",
            zeroline=False,
            range=[y_min, y_max]
        ),
        
        plot_bgcolor=UBER_WHITE,
        paper_bgcolor=UBER_WHITE,
        showlegend=False
    )

    # Footer / Insight
    caption = (
        "<b>Strategy:</b><br>"
        f"‚Ä¢ <span style='color:{UBER_GREEN}'><b>Stars</b></span>: Top priority (Commute?). Protect & grow.<br>"
        f"‚Ä¢ <span style='color:{UBER_PURPLE}'><b>Premium</b></span>: Maintain high pricing (Airport?).<br>"
        f"‚Ä¢ <span style='color:{UBER_ORANGE}'><b>Volume Plays</b></span>: Focus on efficiency/cost-cutting to improve margin."
    )

    fig.add_annotation(
        x=0, y=-0.2,
        xref="paper", yref="paper",
        text=caption,
        showarrow=False,
        font=dict(size=11, color=GRAY_600),
        align="left",
        xanchor="left"
    )

    # Save
    save_plot(fig, FIG_NAME)

# Show
#fig.show()

if loaded:
    print(f"   ‚úÖ {FIG_NAME} loaded from cache")
else:
    print(f"   ‚úÖ {FIG_NAME} generated and saved")

   ‚úÖ fig_2_5_revenue_contribution_matrix generated and saved
