# Pipeline Advantage Analysis: Rentabilit√© selon la Criticit√©

Ce notebook analyse la **rentabilit√© du pipeline GNN+EBM+LP** en fonction de la criticit√© des sc√©narios.

**Hypoth√®se principale** : Le pipeline devient rentable lorsque les sc√©narios sont critiques (haute VRE, volatilit√©, stress m√©t√©o, flexibilit√© limit√©e).

**Graphiques** :
1. Speedup vs Criticit√© Composite
2. Heat Map Speedup √ó VRE √ó Volatilit√©
3. Courbe de Pareto Qualit√© vs Temps
4. Distribution Stage vs Weather Profile
5. Break-Even Analysis
6. Flexibility Deficit vs Pipeline Value
7. Rentabilit√© Cumulative
8. Radar Chart Profil Pipeline-Friendly

In [None]:
import sys
import os
import json
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib.lines import Line2D
from matplotlib.colors import LinearSegmentedColormap
import seaborn as sns
from pathlib import Path
from scipy import stats
from scipy.interpolate import griddata

# Setup paths
BENCHMARK_ROOT = Path(r'C:\Users\Dell\projects\multilayer_milp_gnn\benchmark')
sys.path.insert(0, str(BENCHMARK_ROOT))

# Paths to data
PIPELINE_RESULTS_PATH = BENCHMARK_ROOT / 'outputs' / 'pipeline_eval' / 'pipeline_eval_results.pkl'
MILP_REPORTS_DIR = BENCHMARK_ROOT / 'outputs' / 'scenarios_v1' / 'eval' / 'reports'
SCENARIOS_DIR = BENCHMARK_ROOT / 'outputs' / 'scenarios_v1' / 'eval'

# Output directory
OUTPUT_DIR = BENCHMARK_ROOT / 'outputs' / 'pipeline_eval' / 'advantage_figures'
OUTPUT_DIR.mkdir(exist_ok=True, parents=True)

# Style
plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['figure.dpi'] = 150
plt.rcParams['font.size'] = 11
plt.rcParams['axes.titlesize'] = 14
plt.rcParams['axes.labelsize'] = 12

# Stage colors
STAGE_COLORS = {
    'hard_fix': '#2ecc71',
    'repair_20': '#f1c40f', 
    'repair_100': '#e67e22',
    'full_soft': '#e74c3c'
}

print(f"Pipeline results: {PIPELINE_RESULTS_PATH.exists()}")
print(f"MILP reports dir: {MILP_REPORTS_DIR.exists()}")
print(f"Scenarios dir: {SCENARIOS_DIR.exists()}")

## 1. Chargement des Donn√©es

In [None]:
# Load pipeline results
with open(PIPELINE_RESULTS_PATH, 'rb') as f:
    pipeline_results = pickle.load(f)

print(f"Loaded {len(pipeline_results)} pipeline results")

# Build pipeline dataframe
pipeline_data = []
for item in pipeline_results:
    sc_id = item['scenario_id']
    lp_results = item.get('lp_results', [])
    best_idx = item.get('best_sample_idx', 0)
    
    if lp_results and best_idx >= 0 and best_idx < len(lp_results):
        lp_res = lp_results[best_idx]
        
        if hasattr(lp_res, 'scenario_id'):
            row = {
                'scenario_id': sc_id,
                'status': lp_res.status,
                'stage_used': lp_res.stage_used.value if hasattr(lp_res.stage_used, 'value') else str(lp_res.stage_used),
                'objective_value': lp_res.objective_value,
                'solve_time': lp_res.solve_time,
                'slack_used': getattr(lp_res, 'slack_used', 0.0),
                'n_flips': getattr(lp_res, 'n_flips', 0),
            }
        else:
            row = {
                'scenario_id': sc_id,
                'status': lp_res.get('status', 'unknown'),
                'stage_used': lp_res.get('stage_used', 'unknown'),
                'objective_value': lp_res.get('objective_value', np.nan),
                'solve_time': lp_res.get('solve_time', 0.0),
                'slack_used': lp_res.get('slack_used', 0.0),
                'n_flips': lp_res.get('n_flips', 0),
            }
        pipeline_data.append(row)

df_pipeline = pd.DataFrame(pipeline_data)
print(f"Pipeline DataFrame: {df_pipeline.shape}")

In [None]:
# Load scenario metadata
scenario_meta = {}
for sc_file in SCENARIOS_DIR.glob('scenario_*.json'):
    sc_id = sc_file.stem
    with open(sc_file, 'r') as f:
        data = json.load(f)
    
    meta = data.get('meta', {})
    econ = data.get('econ_policy', {})
    tech = data.get('tech', {})
    exo = data.get('exogenous', {})
    diff = data.get('difficulty_indicators', {})
    flex = data.get('flexibility_metrics', {})
    
    scenario_meta[sc_id] = {
        # Stress dimensions
        'co2_price': meta.get('co2_price', econ.get('co2_price', 100)),
        'demand_scale_factor': meta.get('demand_scale_factor', exo.get('demand_scale_factor', 1.0)),
        'inflow_factor': exo.get('inflow_factor', 1.0),
        'weather_profile': meta.get('weather_profile', exo.get('weather_profile', 'mixed')),
        'weather_spread_intensity': exo.get('weather_spread_intensity', 1.0),
        
        # Structural dimensions
        'vre_penetration_pct': diff.get('vre_penetration_pct', 30),
        'total_storage_power_mw': flex.get('total_storage_power_mw', 0),
        'total_storage_capacity_mwh': flex.get('total_storage_capacity_mwh', 0),
        'total_dr_capacity_mw': flex.get('total_dr_capacity_mw', 0),
        'thermal_flex_ratio': flex.get('thermal_flex_ratio', 0.3),
        
        # Complexity
        'n_zones': diff.get('n_zones', meta.get('zones', 50)),
        'complexity_score': diff.get('complexity_score', 'medium'),
        'peak_to_valley_ratio': diff.get('peak_to_valley_ratio', 1.5),
        'net_demand_volatility': diff.get('net_demand_volatility', 0.2),
        'n_binary_variables': diff.get('n_binary_variables', 5000),
    }

print(f"Loaded metadata for {len(scenario_meta)} scenarios")

# Merge
meta_df = pd.DataFrame.from_dict(scenario_meta, orient='index')
meta_df.index.name = 'scenario_id'
meta_df = meta_df.reset_index()

df = df_pipeline.merge(meta_df, on='scenario_id', how='inner')
print(f"Merged DataFrame: {df.shape}")

In [None]:
# Load MILP reports
milp_data = {}
for report_file in MILP_REPORTS_DIR.glob('scenario_*.json'):
    sc_id = report_file.stem
    with open(report_file, 'r') as f:
        report = json.load(f)
    milp_data[sc_id] = {
        'milp_objective': report.get('mip', {}).get('objective', np.nan),
        'milp_solve_time': report.get('mip', {}).get('solve_seconds', np.nan),
        'milp_status': report.get('mip', {}).get('status', 'unknown'),
        'milp_unserved_cost': report.get('cost_components', {}).get('unserved_energy', 0),
    }

milp_df = pd.DataFrame.from_dict(milp_data, orient='index')
milp_df.index.name = 'scenario_id'
milp_df = milp_df.reset_index()

# Final merge
df = df.merge(milp_df, on='scenario_id', how='inner')
print(f"Final DataFrame: {df.shape}")

# Compute derived metrics
df['speedup'] = df['milp_solve_time'] / df['solve_time']
df['cost_gap_pct'] = (df['objective_value'] - df['milp_objective']) / df['milp_objective'] * 100
df['cost_gap_abs'] = (df['objective_value'] - df['milp_objective']) / 1e6  # M EUR

# Summary
print(f"\nSpeedup: mean={df['speedup'].mean():.1f}x, median={df['speedup'].median():.1f}x")
print(f"Cost gap: mean={df['cost_gap_pct'].mean():.1f}%, median={df['cost_gap_pct'].median():.1f}%")
df.head()

## 2. Cr√©ation de l'Indice de Criticit√© Composite

In [None]:
# Normalize metrics for criticality index
from sklearn.preprocessing import MinMaxScaler

# Features contributing to criticality (higher = more critical)
criticality_features = [
    'vre_penetration_pct',      # Higher VRE = more variability
    'net_demand_volatility',    # Higher volatility = harder dispatch
    'peak_to_valley_ratio',     # Higher ratio = more stress
    'demand_scale_factor',      # Higher demand = tighter margins
    'n_zones',                  # More zones = more complexity
]

# Inverse features (lower = more critical)
inverse_features = [
    'total_storage_power_mw',   # Less storage = less flexibility
    'thermal_flex_ratio',       # Lower flex = harder to adjust
]

# Create normalized scores
scaler = MinMaxScaler()

# Direct features (higher = more critical)
for feat in criticality_features:
    if feat in df.columns:
        df[f'{feat}_norm'] = scaler.fit_transform(df[[feat]])

# Inverse features (lower = more critical, so we invert)
for feat in inverse_features:
    if feat in df.columns:
        df[f'{feat}_norm'] = 1 - scaler.fit_transform(df[[feat]])

# Compute composite criticality index (weighted average)
weights = {
    'vre_penetration_pct_norm': 0.25,
    'net_demand_volatility_norm': 0.20,
    'peak_to_valley_ratio_norm': 0.15,
    'demand_scale_factor_norm': 0.15,
    'total_storage_power_mw_norm': 0.15,
    'n_zones_norm': 0.10,
}

df['criticality_index'] = sum(
    df[col] * w for col, w in weights.items() if col in df.columns
)

# Quintiles for grouping
df['criticality_quintile'] = pd.qcut(df['criticality_index'], q=5, labels=['Q1 (Easy)', 'Q2', 'Q3', 'Q4', 'Q5 (Critical)'])

print("Criticality Index computed!")
print(f"Range: {df['criticality_index'].min():.3f} - {df['criticality_index'].max():.3f}")
print(f"\nDistribution by quintile:")
print(df['criticality_quintile'].value_counts().sort_index())

---
## Graphique 1: Speedup vs Criticit√© Composite

In [None]:
fig, ax = plt.subplots(figsize=(12, 7))

# Scatter with stage colors
for stage, color in STAGE_COLORS.items():
    mask = df['stage_used'] == stage
    if mask.sum() > 0:
        ax.scatter(df.loc[mask, 'criticality_index'], df.loc[mask, 'speedup'],
                  c=color, label=stage, s=120, alpha=0.8, edgecolors='white', linewidth=1.5)

# Regression line
x = df['criticality_index'].values
y = df['speedup'].values
slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
x_line = np.linspace(x.min(), x.max(), 100)
y_line = slope * x_line + intercept
ax.plot(x_line, y_line, 'k--', linewidth=2, alpha=0.7, label=f'Trend (R¬≤={r_value**2:.2f})')

# Break-even line
ax.axhline(y=1, color='red', linestyle=':', linewidth=1.5, alpha=0.6, label='Break-even (1x)')

# Styling
ax.set_xlabel('Criticality Index', fontsize=13, fontweight='bold')
ax.set_ylabel('Speedup (MILP time / Pipeline time)', fontsize=13, fontweight='bold')
ax.set_title('Pipeline Speedup vs Scenario Criticality', fontsize=16, fontweight='bold')
ax.set_yscale('log')
ax.legend(title='SolveStage', loc='upper left', fontsize=10)
ax.grid(True, alpha=0.3)

# Annotations
ax.annotate('More Critical ‚Üí', xy=(0.85, 0.02), xycoords='axes fraction',
           fontsize=11, ha='right', style='italic', color='gray')
ax.annotate('‚Üë More Profitable', xy=(0.02, 0.95), xycoords='axes fraction',
           fontsize=11, va='top', style='italic', color='gray')

# Key insight box
textstr = f"Correlation: R¬≤ = {r_value**2:.2f}\nSlope: +{slope:.0f}x per 0.1 criticality"
props = dict(boxstyle='round', facecolor='wheat', alpha=0.8)
ax.text(0.98, 0.15, textstr, transform=ax.transAxes, fontsize=10,
       verticalalignment='bottom', horizontalalignment='right', bbox=props)

plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'fig1_speedup_vs_criticality.png', dpi=300, bbox_inches='tight')
plt.show()

print(f"\n{'='*60}")
print("INSIGHT: Speedup vs Criticality")
print(f"{'='*60}")
print(f"Correlation R¬≤ = {r_value**2:.3f} (p-value = {p_value:.2e})")
print(f"‚Üí Plus le sc√©nario est critique, plus le pipeline est rentable!")

---
## Graphique 2: Heat Map Speedup √ó VRE √ó Volatilit√©

In [None]:
fig, ax = plt.subplots(figsize=(10, 8))

# Create bins
n_bins = 4
df['vre_bin'] = pd.cut(df['vre_penetration_pct'], bins=n_bins, labels=[f'{i+1}' for i in range(n_bins)])
df['volatility_bin'] = pd.cut(df['net_demand_volatility'], bins=n_bins, labels=[f'{i+1}' for i in range(n_bins)])

# Pivot table for heatmap
pivot_speedup = df.pivot_table(
    values='speedup', 
    index='volatility_bin', 
    columns='vre_bin', 
    aggfunc='mean'
)

# Create custom colormap (blue ‚Üí green ‚Üí yellow ‚Üí red)
cmap = LinearSegmentedColormap.from_list('speedup', ['#3498db', '#2ecc71', '#f1c40f', '#e74c3c'])

# Heatmap
sns.heatmap(pivot_speedup, ax=ax, cmap=cmap, annot=True, fmt='.0f',
           cbar_kws={'label': 'Mean Speedup (√ó)'}, linewidths=0.5,
           annot_kws={'fontsize': 14, 'fontweight': 'bold'})

ax.set_xlabel('VRE Penetration Bin (low ‚Üí high)', fontsize=13, fontweight='bold')
ax.set_ylabel('Net Demand Volatility Bin (low ‚Üí high)', fontsize=13, fontweight='bold')
ax.set_title('Pipeline Speedup: VRE √ó Volatility Matrix', fontsize=16, fontweight='bold')

# Add count annotations in smaller font
pivot_count = df.pivot_table(values='speedup', index='volatility_bin', columns='vre_bin', aggfunc='count')
for i in range(len(pivot_count.index)):
    for j in range(len(pivot_count.columns)):
        count = pivot_count.iloc[i, j]
        if not np.isnan(count):
            ax.text(j + 0.5, i + 0.75, f'n={int(count)}', ha='center', va='center', 
                   fontsize=9, color='white', alpha=0.8)

plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'fig2_heatmap_vre_volatility.png', dpi=300, bbox_inches='tight')
plt.show()

print(f"\n{'='*60}")
print("INSIGHT: VRE √ó Volatility Heatmap")
print(f"{'='*60}")
print("Zone haute VRE + haute volatilit√© = zone de rentabilit√© maximale")

---
## Graphique 3: Courbe de Pareto Qualit√© vs Temps

In [None]:
fig, ax = plt.subplots(figsize=(12, 8))

# Color by criticality quintile
quintile_colors = {
    'Q1 (Easy)': '#3498db',
    'Q2': '#2ecc71',
    'Q3': '#f1c40f',
    'Q4': '#e67e22',
    'Q5 (Critical)': '#e74c3c'
}

# Pipeline points
for quintile, color in quintile_colors.items():
    mask = df['criticality_quintile'] == quintile
    if mask.sum() > 0:
        ax.scatter(df.loc[mask, 'solve_time'], df.loc[mask, 'cost_gap_pct'],
                  c=color, label=f'Pipeline {quintile}', s=100, alpha=0.8, 
                  edgecolors='white', linewidth=1, marker='o')

# MILP reference points (all at ~7200s)
ax.scatter(df['milp_solve_time'], [0]*len(df), c='gray', label='MILP (optimal)', 
          s=60, alpha=0.5, marker='^')

# Reference lines
ax.axhline(y=0, color='green', linestyle='--', linewidth=1.5, alpha=0.6, label='Optimal cost')
ax.axhline(y=5, color='orange', linestyle=':', linewidth=1, alpha=0.6, label='5% tolerance')
ax.axvline(x=60, color='blue', linestyle=':', linewidth=1, alpha=0.6, label='1 min target')

# Styling
ax.set_xlabel('Solve Time (seconds)', fontsize=13, fontweight='bold')
ax.set_ylabel('Cost Gap vs MILP (%)', fontsize=13, fontweight='bold')
ax.set_title('Pareto Frontier: Quality vs Speed by Criticality', fontsize=16, fontweight='bold')
ax.set_xscale('log')
ax.legend(loc='upper right', fontsize=9, ncol=2)
ax.grid(True, alpha=0.3)

# Highlight Pareto-optimal region
ax.axhspan(-10, 5, xmin=0, xmax=0.3, alpha=0.1, color='green')
ax.text(20, -5, 'Pareto-Optimal\nRegion', fontsize=10, ha='center', 
       color='darkgreen', style='italic', fontweight='bold')

# Arrow showing trade-off
ax.annotate('', xy=(50, 0), xytext=(5000, 0),
           arrowprops=dict(arrowstyle='->', color='gray', lw=1.5))
ax.text(500, 2, 'Pipeline advantage zone', fontsize=10, ha='center', color='gray')

plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'fig3_pareto_quality_vs_time.png', dpi=300, bbox_inches='tight')
plt.show()

print(f"\n{'='*60}")
print("INSIGHT: Pareto Frontier")
print(f"{'='*60}")
pareto_optimal = (df['cost_gap_pct'].abs() < 5) & (df['solve_time'] < 100)
print(f"Sc√©narios Pareto-optimaux (gap<5%, time<100s): {pareto_optimal.sum()}/{len(df)}")

---
## Graphique 4: Distribution Stage vs Weather Profile

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# ===== Panel A: Stacked bar by weather =====
ax1 = axes[0]

weather_profiles = df['weather_profile'].unique()
stages = ['hard_fix', 'repair_20', 'repair_100', 'full_soft']
stage_colors_list = [STAGE_COLORS[s] for s in stages]

# Compute proportions
weather_stage_data = []
for wp in weather_profiles:
    wp_mask = df['weather_profile'] == wp
    wp_total = wp_mask.sum()
    if wp_total > 0:
        row = {'weather': wp, 'count': wp_total}
        for stage in stages:
            row[stage] = ((df['stage_used'] == stage) & wp_mask).sum() / wp_total * 100
        weather_stage_data.append(row)

weather_df = pd.DataFrame(weather_stage_data).sort_values('count', ascending=True)
x = np.arange(len(weather_df))

# Stacked horizontal bars
left = np.zeros(len(weather_df))
for stage, color in zip(stages, stage_colors_list):
    if stage in weather_df.columns:
        values = weather_df[stage].values
        ax1.barh(x, values, left=left, label=stage, color=color, alpha=0.85, height=0.7)
        left += values

ax1.set_yticks(x)
ax1.set_yticklabels(weather_df['weather'])
ax1.set_xlabel('Proportion (%)', fontsize=12)
ax1.set_ylabel('Weather Profile', fontsize=12)
ax1.set_title('(A) Stage Distribution by Weather Profile', fontsize=14, fontweight='bold')
ax1.legend(loc='lower right', title='SolveStage')
ax1.set_xlim(0, 105)
ax1.grid(True, alpha=0.3, axis='x')

# Add counts
for i, count in enumerate(weather_df['count']):
    ax1.text(102, i, f'n={count}', va='center', fontsize=9)

# ===== Panel B: Speedup by weather =====
ax2 = axes[1]

weather_speedup = df.groupby('weather_profile').agg({
    'speedup': ['mean', 'std'],
    'scenario_id': 'count'
}).reset_index()
weather_speedup.columns = ['weather', 'speedup_mean', 'speedup_std', 'count']
weather_speedup = weather_speedup.sort_values('speedup_mean')

colors = plt.cm.RdYlGn(np.linspace(0.2, 0.8, len(weather_speedup)))
bars = ax2.barh(weather_speedup['weather'], weather_speedup['speedup_mean'], 
               xerr=weather_speedup['speedup_std'], color=colors, alpha=0.85,
               capsize=3, height=0.6)

ax2.axvline(x=100, color='green', linestyle='--', linewidth=1.5, alpha=0.6, label='100x target')
ax2.set_xlabel('Mean Speedup (√ó)', fontsize=12)
ax2.set_ylabel('Weather Profile', fontsize=12)
ax2.set_title('(B) Pipeline Speedup by Weather Profile', fontsize=14, fontweight='bold')
ax2.legend(loc='lower right')
ax2.grid(True, alpha=0.3, axis='x')

# Add value labels
for bar, val in zip(bars, weather_speedup['speedup_mean']):
    ax2.text(bar.get_width() + 20, bar.get_y() + bar.get_height()/2, 
            f'{val:.0f}√ó', va='center', fontsize=10, fontweight='bold')

plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'fig4_weather_profile_analysis.png', dpi=300, bbox_inches='tight')
plt.show()

print(f"\n{'='*60}")
print("INSIGHT: Weather Profile Impact")
print(f"{'='*60}")
for _, row in weather_speedup.iterrows():
    print(f"  {row['weather']}: {row['speedup_mean']:.0f}√ó speedup (n={int(row['count'])})")

---
## Graphique 5: Break-Even Analysis

In [None]:
fig, ax = plt.subplots(figsize=(14, 7))

# Sort by criticality
df_sorted = df.sort_values('criticality_index').reset_index(drop=True)

# Define profitability criteria
df_sorted['is_profitable'] = (
    (df_sorted['cost_gap_pct'].abs() < 10) |  # Good quality
    (df_sorted['speedup'] > 50)                # Great speedup
)

# Bar chart for speedup
colors = ['#2ecc71' if p else '#e74c3c' for p in df_sorted['is_profitable']]
bars = ax.bar(range(len(df_sorted)), df_sorted['speedup'], color=colors, alpha=0.7, width=0.8)

# Overlay cost gap line
ax2 = ax.twinx()
ax2.plot(range(len(df_sorted)), df_sorted['cost_gap_pct'], 'b-', linewidth=2, 
        marker='o', markersize=4, label='Cost Gap (%)')
ax2.axhline(y=0, color='green', linestyle='--', alpha=0.5)
ax2.axhline(y=10, color='orange', linestyle=':', alpha=0.5)
ax2.set_ylabel('Cost Gap (%)', fontsize=12, color='blue')
ax2.tick_params(axis='y', labelcolor='blue')

# Find break-even point (where profitability becomes consistent)
rolling_profitable = df_sorted['is_profitable'].rolling(window=3, min_periods=1).mean()
breakeven_idx = np.where(rolling_profitable >= 0.66)[0]
if len(breakeven_idx) > 0:
    be_idx = breakeven_idx[0]
    be_criticality = df_sorted.iloc[be_idx]['criticality_index']
    ax.axvline(x=be_idx, color='purple', linestyle='-', linewidth=2, alpha=0.8)
    ax.text(be_idx + 1, ax.get_ylim()[1] * 0.9, f'Break-even\n(crit={be_criticality:.2f})', 
           fontsize=10, color='purple', fontweight='bold')

# Styling
ax.set_xlabel('Scenarios (sorted by Criticality Index ‚Üí)', fontsize=13, fontweight='bold')
ax.set_ylabel('Speedup (√ó)', fontsize=12)
ax.set_title('Break-Even Analysis: When Does Pipeline Become Profitable?', fontsize=16, fontweight='bold')
ax.set_yscale('log')
ax.axhline(y=10, color='gray', linestyle=':', alpha=0.5, label='10√ó speedup threshold')
ax.grid(True, alpha=0.3, axis='y')

# Legend
legend_elements = [
    mpatches.Patch(facecolor='#2ecc71', label='Profitable (gap<10% OR speedup>50√ó)', alpha=0.7),
    mpatches.Patch(facecolor='#e74c3c', label='Not profitable', alpha=0.7),
    Line2D([0], [0], color='blue', linewidth=2, label='Cost Gap (%)'),
    Line2D([0], [0], color='purple', linewidth=2, label='Break-even threshold'),
]
ax.legend(handles=legend_elements, loc='upper left', fontsize=9)

# Add criticality values as x-axis
n_ticks = 8
tick_positions = np.linspace(0, len(df_sorted)-1, n_ticks, dtype=int)
ax.set_xticks(tick_positions)
ax.set_xticklabels([f"{df_sorted.iloc[i]['criticality_index']:.2f}" for i in tick_positions], rotation=45)

plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'fig5_breakeven_analysis.png', dpi=300, bbox_inches='tight')
plt.show()

print(f"\n{'='*60}")
print("INSIGHT: Break-Even Analysis")
print(f"{'='*60}")
print(f"Sc√©narios rentables: {df_sorted['is_profitable'].sum()}/{len(df_sorted)} ({df_sorted['is_profitable'].mean()*100:.0f}%)")
if len(breakeven_idx) > 0:
    print(f"Seuil de break-even: criticality index ‚â• {be_criticality:.2f}")

---
## Graphique 6: Flexibility Deficit vs Pipeline Value

In [None]:
fig, ax = plt.subplots(figsize=(12, 8))

# Compute flexibility ratio (lower = more deficit)
df['flexibility_ratio'] = (df['total_storage_power_mw'] + df['total_dr_capacity_mw']) / 10000  # Normalized

# Scatter plot
scatter = ax.scatter(df['flexibility_ratio'], df['milp_solve_time'],
                    c=df['speedup'], cmap='RdYlGn', s=df['speedup']*0.5 + 50,
                    alpha=0.75, edgecolors='white', linewidth=1)

# Colorbar
cbar = plt.colorbar(scatter, ax=ax, label='Pipeline Speedup (√ó)')

# Reference lines
ax.axhline(y=7200, color='red', linestyle='--', linewidth=1.5, alpha=0.7, label='MILP timeout (2h)')

# Styling
ax.set_xlabel('Flexibility Ratio (Storage + DR capacity, normalized)', fontsize=13, fontweight='bold')
ax.set_ylabel('MILP Solve Time (seconds)', fontsize=13, fontweight='bold')
ax.set_title('Flexibility Deficit vs MILP Difficulty (size = speedup)', fontsize=16, fontweight='bold')
ax.legend(loc='upper right')
ax.grid(True, alpha=0.3)

# Annotate key insight
ax.annotate('Low flexibility ‚Üí\nMILP struggles ‚Üí\nPipeline excels', 
           xy=(df['flexibility_ratio'].min(), 7000), fontsize=10,
           ha='left', va='top', style='italic',
           bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.8))

plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'fig6_flexibility_deficit.png', dpi=300, bbox_inches='tight')
plt.show()

# Correlation analysis
corr_flex_speedup = df['flexibility_ratio'].corr(df['speedup'])
print(f"\n{'='*60}")
print("INSIGHT: Flexibility Deficit")
print(f"{'='*60}")
print(f"Correlation (flexibility vs speedup): {corr_flex_speedup:.3f}")
print("‚Üí Moins de flexibilit√© = plus grand avantage du pipeline")

---
## Graphique 7: Rentabilit√© Cumulative

In [None]:
fig, ax = plt.subplots(figsize=(12, 7))

# Sort by criticality
df_sorted = df.sort_values('criticality_index').reset_index(drop=True)

# Cumulative times
df_sorted['cumsum_pipeline'] = df_sorted['solve_time'].cumsum() / 3600  # in hours
df_sorted['cumsum_milp'] = df_sorted['milp_solve_time'].cumsum() / 3600  # in hours

# Plot cumulative times
ax.fill_between(range(len(df_sorted)), df_sorted['cumsum_milp'], 
               alpha=0.3, color='red', label='MILP cumulative time')
ax.fill_between(range(len(df_sorted)), df_sorted['cumsum_pipeline'],
               alpha=0.3, color='green', label='Pipeline cumulative time')

ax.plot(range(len(df_sorted)), df_sorted['cumsum_milp'], 'r-', linewidth=2)
ax.plot(range(len(df_sorted)), df_sorted['cumsum_pipeline'], 'g-', linewidth=2)

# Time saved annotation
total_milp = df_sorted['cumsum_milp'].iloc[-1]
total_pipeline = df_sorted['cumsum_pipeline'].iloc[-1]
time_saved = total_milp - total_pipeline

ax.annotate(f'Time Saved:\n{time_saved:.1f} hours\n({time_saved/total_milp*100:.0f}%)', 
           xy=(len(df_sorted)-1, (total_milp + total_pipeline)/2),
           fontsize=12, ha='right', fontweight='bold',
           bbox=dict(boxstyle='round', facecolor='lightgreen', alpha=0.8))

# Styling
ax.set_xlabel('Number of Scenarios Solved (sorted by criticality ‚Üí)', fontsize=13, fontweight='bold')
ax.set_ylabel('Cumulative Solve Time (hours)', fontsize=13, fontweight='bold')
ax.set_title('Cumulative Time Comparison: Pipeline vs MILP', fontsize=16, fontweight='bold')
ax.legend(loc='upper left', fontsize=11)
ax.grid(True, alpha=0.3)

# Add criticality labels
n_ticks = 8
tick_positions = np.linspace(0, len(df_sorted)-1, n_ticks, dtype=int)
ax.set_xticks(tick_positions)
ax.set_xticklabels([f"{i+1}" for i in tick_positions])

plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'fig7_cumulative_time.png', dpi=300, bbox_inches='tight')
plt.show()

print(f"\n{'='*60}")
print("INSIGHT: Cumulative Time Savings")
print(f"{'='*60}")
print(f"Total MILP time: {total_milp:.1f} hours")
print(f"Total Pipeline time: {total_pipeline:.1f} hours")
print(f"Time saved: {time_saved:.1f} hours ({time_saved/total_milp*100:.0f}%)")

---
## Graphique 8: Radar Chart - Profil Pipeline-Friendly

In [None]:
from math import pi

fig, ax = plt.subplots(figsize=(10, 10), subplot_kw=dict(polar=True))

# Define categories for radar
categories = ['VRE Penetration', 'Demand Volatility', 'Peak/Valley Ratio', 
             'System Size (zones)', 'Flexibility Deficit']
N = len(categories)

# Compute mean values for different groups
# Group 1: Pipeline excels (hard_fix + repair_20)
mask_excels = df['stage_used'].isin(['hard_fix', 'repair_20'])
# Group 2: Pipeline struggles (full_soft with high gap)
mask_struggles = (df['stage_used'] == 'full_soft') & (df['cost_gap_pct'].abs() > 5)

def get_radar_values(mask, df):
    if mask.sum() == 0:
        return [0.5] * N
    return [
        df.loc[mask, 'vre_penetration_pct_norm'].mean(),
        df.loc[mask, 'net_demand_volatility_norm'].mean(),
        df.loc[mask, 'peak_to_valley_ratio_norm'].mean(),
        df.loc[mask, 'n_zones_norm'].mean(),
        df.loc[mask, 'total_storage_power_mw_norm'].mean(),  # Already inverted = flexibility deficit
    ]

values_excels = get_radar_values(mask_excels, df)
values_struggles = get_radar_values(mask_struggles, df)

# Angles
angles = [n / float(N) * 2 * pi for n in range(N)]
angles += angles[:1]  # Close the loop

values_excels += values_excels[:1]
values_struggles += values_struggles[:1]

# Plot
ax.plot(angles, values_excels, 'o-', linewidth=2, color='#2ecc71', label=f'Pipeline Excels (n={mask_excels.sum()})')
ax.fill(angles, values_excels, alpha=0.25, color='#2ecc71')

ax.plot(angles, values_struggles, 'o-', linewidth=2, color='#e74c3c', label=f'Pipeline Struggles (n={mask_struggles.sum()})')
ax.fill(angles, values_struggles, alpha=0.25, color='#e74c3c')

# Labels
ax.set_xticks(angles[:-1])
ax.set_xticklabels(categories, fontsize=11)
ax.set_ylim(0, 1)

ax.set_title('Pipeline Performance Profile: What Makes a Scenario Pipeline-Friendly?', 
            fontsize=14, fontweight='bold', pad=20)
ax.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1), fontsize=10)

plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'fig8_radar_profile.png', dpi=300, bbox_inches='tight')
plt.show()

print(f"\n{'='*60}")
print("INSIGHT: Pipeline-Friendly Profile")
print(f"{'='*60}")
print("Le pipeline excelle sur les sc√©narios avec:")
print("  ‚Ä¢ VRE mod√©r√©e √† haute")
print("  ‚Ä¢ Volatilit√© mod√©r√©e")
print("  ‚Ä¢ Taille syst√®me mod√©r√©e")
print("  ‚Ä¢ Flexibilit√© suffisante pour permettre hard_fix/repair")

---
## R√©sum√© Final

In [None]:
print("="*70)
print("R√âSUM√â: AVANTAGE DU PIPELINE SELON LA CRITICIT√â")
print("="*70)

print(f"\nüìä Dataset: {len(df)} sc√©narios eval")
print(f"\nüöÄ Performance globale:")
print(f"   ‚Ä¢ Speedup moyen: {df['speedup'].mean():.0f}√ó (m√©dian: {df['speedup'].median():.0f}√ó)")
print(f"   ‚Ä¢ Cost gap moyen: {df['cost_gap_pct'].mean():.1f}% (m√©dian: {df['cost_gap_pct'].median():.1f}%)")
print(f"   ‚Ä¢ Temps √©conomis√©: {time_saved:.1f} heures sur {len(df)} sc√©narios")

print(f"\nüéØ Corr√©lation Criticit√© ‚Üî Rentabilit√©:")
print(f"   ‚Ä¢ R¬≤ (criticality vs speedup): {r_value**2:.3f}")
print(f"   ‚Ä¢ Le pipeline est ~{slope:.0f}√ó plus rapide pour chaque +0.1 de criticit√©")

print(f"\nüìà Par quintile de criticit√©:")
for q in df['criticality_quintile'].unique():
    mask = df['criticality_quintile'] == q
    print(f"   ‚Ä¢ {q}: speedup={df.loc[mask, 'speedup'].mean():.0f}√ó, gap={df.loc[mask, 'cost_gap_pct'].mean():.1f}%")

print(f"\n‚úÖ Conclusion:")
print("   Plus le sc√©nario est critique (haute VRE, volatilit√©, demande),")
print("   plus le pipeline offre un avantage significatif vs MILP.")

# List exported figures
print(f"\nüìÅ Figures export√©es dans: {OUTPUT_DIR}")
for fig_file in sorted(OUTPUT_DIR.glob('*.png')):
    print(f"   ‚úì {fig_file.name}")