# Experiment 13: Recovery Trajectory Analysis

**Phase 4 - Understanding Spatial and Temporal Recovery Dynamics**

## Background

Previous experiments established:
- Passive recovery is possible (~38.6%) with fixed solver
- Active forcing follows linear relationship: recovery ≈ 0.74 × |f| + 0.51
- Network fragmentation increases recovery difficulty

## Key Questions

**How does recovery propagate through the network?**

1. **Which cells recover first?** Edge vs interior, high vs low connectivity
2. **Are there recovery waves?** Does recovery propagate spatially?
3. **What predicts cell-level recovery?** Connectivity, barrier height, position?
4. **Is recovery synchronized or independent?** Correlation structure

## Experimental Design

| Parameter | Value |
|-----------|-------|
| Network | 50-cell Amazon subnetwork |
| Ensemble runs | 100 (for statistical power) |
| Forcing levels | [0.0, -0.2, -0.4] |
| **Total simulations** | 300 |

## Analysis Focus

Track for each cell:
- Time to tip during cascade
- Time to recover (if recovered)
- Recovery sequence (order of recovery)
- Correlation with neighbors' states

## 1. Setup and Imports

In [None]:
import sys
sys.path.insert(0, '/opt/research-local/src')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle
import time
from pathlib import Path
from netCDF4 import Dataset
from dask.distributed import as_completed
from collections import defaultdict
from scipy.stats import pearsonr, spearmanr

# Core energy-constrained module
from energy_constrained import (
    EnergyConstrainedNetwork,
    EnergyConstrainedCusp,
    GradientDrivenCoupling,
    run_two_phase_experiment,
    get_dask_client,
    compute_network_metrics
)

print("Imports successful!")
print(f"NumPy version: {np.__version__}")

In [None]:
# Connect to Dask cluster
client = get_dask_client()
print(f"Connected to: {client.scheduler_info()['address']}")
print(f"Workers: {len(client.scheduler_info()['workers'])}")
print(f"Total threads: {sum(w['nthreads'] for w in client.scheduler_info()['workers'].values())}")

## 2. Load Amazon Data

In [None]:
DATA_PATH = Path('/opt/research-local/data/amazon/amazon_adaptation_model/average_network/era5_new_network_data')

def load_amazon_data(year=2003, months=[7, 8, 9]):
    """Load and average Amazon moisture recycling data."""
    all_rain = []
    all_evap = []
    all_network = []
    
    for month in months:
        file_path = DATA_PATH / f'1deg_{year}_{month:02d}.nc'
        if file_path.exists():
            with Dataset(file_path, 'r') as ds:
                all_rain.append(ds.variables['rain'][:])
                all_evap.append(ds.variables['evap'][:])
                all_network.append(ds.variables['network'][:])
    
    return {
        'rain': np.mean(all_rain, axis=0),
        'evap': np.mean(all_evap, axis=0),
        'network': np.mean(all_network, axis=0),
        'n_cells': len(all_rain[0])
    }

amazon_data = load_amazon_data(year=2003)
print(f"Loaded Amazon data: {amazon_data['n_cells']} cells")

## 3. Experiment Configuration

In [None]:
# Experiment 13: Recovery Trajectory Analysis

SWEEP_CONFIG = {
    # Forcing levels to compare
    'forcing_values': [0.0, -0.20, -0.40],
    'n_runs_per_forcing': 100,  # Large ensemble for trajectory statistics
    
    # Network parameters
    'n_cells': 50,
    'min_flow': 1.0,
    'barrier_height': 0.2,
    
    # Two-phase simulation parameters
    'cascade_duration': 200,
    'recovery_duration': 800,
    'dt': 0.5,
    'cascade_sigma': 0.06,
    'cascade_alpha': 1.5,
    'recovery_sigma': 0.04,
    'recovery_alpha': 2.0,
    
    # Seeds
    'base_seed': 42,
}

n_forcing = len(SWEEP_CONFIG['forcing_values'])
total_sims = n_forcing * SWEEP_CONFIG['n_runs_per_forcing']

print("=" * 60)
print("EXPERIMENT 13: RECOVERY TRAJECTORY ANALYSIS")
print("=" * 60)
print(f"Forcing values: {SWEEP_CONFIG['forcing_values']}")
print(f"Runs per forcing: {SWEEP_CONFIG['n_runs_per_forcing']}")
print(f"Total simulations: {total_sims}")
print(f"\nEstimated runtime: ~{total_sims * 2 / 60:.0f} minutes on 14 workers")

## 4. Network Creation with Cell Metrics

In [None]:
def create_amazon_network_with_cell_info(data, config, seed=42):
    """
    Create Amazon network and return cell-level information.
    
    Returns:
        network: EnergyConstrainedNetwork
        cell_info: DataFrame with cell properties
    """
    np.random.seed(seed)
    
    network_matrix = data['network']
    rain = data['rain']
    evap = data['evap']
    n_cells = config['n_cells']
    min_flow = config['min_flow']
    barrier_height = config['barrier_height']
    
    total_flow = network_matrix.sum(axis=0) + network_matrix.sum(axis=1)
    top_indices = np.argsort(total_flow)[-n_cells:]
    
    net = EnergyConstrainedNetwork()
    cell_info = []
    
    # Add elements and track cell info
    for i, idx in enumerate(top_indices):
        element = EnergyConstrainedCusp(
            a=-1.0, b=1.0, c=0.0, x_0=0.0,
            barrier_height=barrier_height,
            dissipation_rate=0.1
        )
        net.add_element(f'cell_{i}', element)
        
        cell_info.append({
            'cell_id': i,
            'original_idx': idx,
            'rain': rain[idx],
            'evap': evap[idx],
            'rain_evap_ratio': rain[idx] / max(evap[idx], 1),
            'total_flow': total_flow[idx],
        })
    
    # Add couplings and calculate connectivity
    in_degree = np.zeros(n_cells)
    out_degree = np.zeros(n_cells)
    in_flow = np.zeros(n_cells)
    out_flow = np.zeros(n_cells)
    
    for i, idx_i in enumerate(top_indices):
        for j, idx_j in enumerate(top_indices):
            if i != j:
                flow = network_matrix[idx_i, idx_j]
                if flow > min_flow:
                    coupling = GradientDrivenCoupling(
                        conductivity=flow / 100.0,
                        state_coupling=0.1
                    )
                    net.add_coupling(f'cell_{i}', f'cell_{j}', coupling)
                    out_degree[i] += 1
                    in_degree[j] += 1
                    out_flow[i] += flow
                    in_flow[j] += flow
    
    # Add connectivity to cell info
    for i, info in enumerate(cell_info):
        info['in_degree'] = in_degree[i]
        info['out_degree'] = out_degree[i]
        info['total_degree'] = in_degree[i] + out_degree[i]
        info['in_flow'] = in_flow[i]
        info['out_flow'] = out_flow[i]
        info['net_flow'] = in_flow[i] - out_flow[i]  # Positive = sink, Negative = source
    
    cell_df = pd.DataFrame(cell_info)
    
    return net, cell_df, top_indices


# Create network
network, cell_info, selected_cells = create_amazon_network_with_cell_info(
    amazon_data, SWEEP_CONFIG
)

print(f"Network: {network.n_elements} nodes, {network.number_of_edges()} edges")
print(f"\nCell property ranges:")
print(f"  Total degree: {cell_info['total_degree'].min():.0f} - {cell_info['total_degree'].max():.0f}")
print(f"  In-flow: {cell_info['in_flow'].min():.1f} - {cell_info['in_flow'].max():.1f} mm")
print(f"  Net flow: {cell_info['net_flow'].min():.1f} - {cell_info['net_flow'].max():.1f} mm")

cell_info.head()

## 5. Worker Function with Detailed Trajectory Data

In [None]:
def run_trajectory_experiment(args):
    """
    Worker function that returns detailed trajectory information.
    """
    network_bytes, forcing, config, seed = args
    
    import sys
    import numpy as np
    import pickle
    
    if '/opt/research-local/src' not in sys.path:
        sys.path.insert(0, '/opt/research-local/src')
    
    from energy_constrained.solvers import run_two_phase_experiment
    
    # Reconstruct network
    network = pickle.loads(network_bytes)
    np.random.seed(seed)
    
    # Run two-phase experiment
    result = run_two_phase_experiment(
        network=network,
        cascade_duration=config['cascade_duration'],
        recovery_duration=config['recovery_duration'],
        dt=config['dt'],
        cascade_sigma=config['cascade_sigma'],
        cascade_alpha=config['cascade_alpha'],
        recovery_sigma=config['recovery_sigma'],
        recovery_alpha=config['recovery_alpha'],
        recovery_forcing=forcing,
        seed=seed
    )
    
    # Extract detailed cell-level information
    n_cells = result.x_full.shape[1]
    cascade_end_idx = int(config['cascade_duration'] / config['dt'])
    
    cell_results = []
    for j in range(n_cells):
        x_traj = result.x_full[:, j]
        cascade_traj = x_traj[:cascade_end_idx]
        recovery_traj = x_traj[cascade_end_idx:]
        
        # Find first tip time during cascade
        tip_crossings = np.where(cascade_traj > 0)[0]
        first_tip_time = tip_crossings[0] * config['dt'] if len(tip_crossings) > 0 else np.nan
        
        # Was cell tipped at cascade end?
        tipped_at_cascade_end = cascade_traj[-1] > 0
        
        # Find recovery time (first persistent crossing back)
        recovery_time = np.nan
        recovered = False
        if tipped_at_cascade_end:
            recovery_crossings = np.where(recovery_traj < 0)[0]
            for cross_idx in recovery_crossings:
                # Check persistence
                if cross_idx + 10 < len(recovery_traj):
                    if np.all(recovery_traj[cross_idx:cross_idx+10] < 0):
                        recovery_time = cross_idx * config['dt']
                        recovered = True
                        break
                else:
                    if np.all(recovery_traj[cross_idx:] < 0):
                        recovery_time = cross_idx * config['dt']
                        recovered = True
                        break
        
        # State statistics
        mean_state_cascade = np.mean(cascade_traj)
        mean_state_recovery = np.mean(recovery_traj)
        pct_time_tipped = np.mean(x_traj > 0) * 100
        
        cell_results.append({
            'cell_id': j,
            'first_tip_time': first_tip_time,
            'tipped_at_cascade_end': tipped_at_cascade_end,
            'recovered': recovered,
            'recovery_time': recovery_time,
            'mean_state_cascade': mean_state_cascade,
            'mean_state_recovery': mean_state_recovery,
            'pct_time_tipped': pct_time_tipped,
            'final_state': x_traj[-1],
        })
    
    return {
        'forcing': forcing,
        'seed': seed,
        'run_recovery_fraction': result.metrics['recovery_fraction'],
        'n_tipped_cascade': result.metrics['n_tipped_at_cascade_end'],
        'n_recovered': result.metrics['n_recovered'],
        'cell_results': cell_results,
    }

print("Worker function defined.")

## 6. Run Trajectory Analysis

In [None]:
# Serialize and scatter network
network_bytes = pickle.dumps(network)
print(f"Network serialized: {len(network_bytes) / 1024:.1f} KB")

network_future = client.scatter(network_bytes, broadcast=True)
print("Network broadcast to all workers")

# Build task arguments
print("\n" + "=" * 60)
print("EXPERIMENT 13: Starting Trajectory Analysis")
print("=" * 60)
start_time = time.time()

task_args = []
for f_idx, forcing in enumerate(SWEEP_CONFIG['forcing_values']):
    for run_idx in range(SWEEP_CONFIG['n_runs_per_forcing']):
        seed = SWEEP_CONFIG['base_seed'] + f_idx * 10000 + run_idx
        task_args.append((network_bytes, float(forcing), SWEEP_CONFIG, seed))

print(f"Generated {len(task_args)} task arguments")

# Submit tasks
futures = client.map(run_trajectory_experiment, task_args)
print(f"Submitted {len(futures)} tasks")

# Collect results
all_results = []
print("\nProgress:")
for i, future in enumerate(as_completed(futures)):
    result = future.result()
    all_results.append(result)
    
    if (i + 1) % 30 == 0:
        elapsed = time.time() - start_time
        rate = (i + 1) / elapsed
        remaining = (len(futures) - i - 1) / rate
        print(f"  Completed {i+1}/{len(futures)} ({100*(i+1)/len(futures):.1f}%) "
              f"- {elapsed:.0f}s elapsed, ~{remaining:.0f}s remaining")

elapsed = time.time() - start_time
print(f"\n" + "=" * 60)
print(f"COMPLETE: {len(all_results)} simulations in {elapsed:.1f}s ({elapsed/60:.1f} min)")
print("=" * 60)

## 7. Results Aggregation

In [None]:
# Create run-level DataFrame
run_df = pd.DataFrame([{
    'forcing': r['forcing'],
    'seed': r['seed'],
    'recovery_fraction': r['run_recovery_fraction'],
    'n_tipped_cascade': r['n_tipped_cascade'],
    'n_recovered': r['n_recovered'],
} for r in all_results])

print(f"Run-level results: {run_df.shape}")

# Create cell-level DataFrame
cell_rows = []
for r in all_results:
    for cell in r['cell_results']:
        cell_rows.append({
            'forcing': r['forcing'],
            'seed': r['seed'],
            **cell
        })

cell_df = pd.DataFrame(cell_rows)
print(f"Cell-level results: {cell_df.shape}")

cell_df.head()

In [None]:
# Merge cell properties with results
cell_df = cell_df.merge(cell_info, on='cell_id')
print(f"Merged cell results: {cell_df.shape}")
cell_df.head()

## 8. Cell-Level Recovery Analysis

In [None]:
# Calculate recovery rate per cell per forcing level
cell_recovery_summary = cell_df.groupby(['cell_id', 'forcing']).agg({
    'tipped_at_cascade_end': 'mean',  # Proportion of runs where tipped
    'recovered': ['mean', 'sum'],  # Recovery rate and count
    'recovery_time': 'mean',  # Mean recovery time
    'first_tip_time': 'mean',  # Mean time to first tip
}).round(4)

cell_recovery_summary.columns = ['_'.join(col) for col in cell_recovery_summary.columns]
cell_recovery_summary = cell_recovery_summary.reset_index()

# Merge with cell properties
cell_recovery_summary = cell_recovery_summary.merge(cell_info, on='cell_id')

print("Cell recovery rates by forcing:")
cell_recovery_summary.head(10)

In [None]:
# Analyze predictors of recovery
print("=" * 70)
print("PREDICTORS OF CELL-LEVEL RECOVERY")
print("=" * 70)

for forcing in SWEEP_CONFIG['forcing_values']:
    subset = cell_recovery_summary[cell_recovery_summary['forcing'] == forcing]
    
    print(f"\nForcing = {forcing}:")
    
    # Correlations with recovery rate
    predictors = ['total_degree', 'in_flow', 'net_flow', 'rain_evap_ratio']
    for pred in predictors:
        valid = subset.dropna(subset=['recovered_mean', pred])
        if len(valid) > 5:
            r, p = spearmanr(valid[pred], valid['recovered_mean'])
            sig = "*" if p < 0.05 else ""
            print(f"  {pred:<20}: ρ = {r:+.3f} (p = {p:.4f}){sig}")

## 9. Visualization: Recovery Patterns

In [None]:
fig, axes = plt.subplots(2, 3, figsize=(15, 10))

colors = plt.cm.viridis(np.linspace(0.2, 0.8, len(SWEEP_CONFIG['forcing_values'])))

# Panel 1: Recovery Rate vs Total Degree
ax = axes[0, 0]
for i, forcing in enumerate(SWEEP_CONFIG['forcing_values']):
    subset = cell_recovery_summary[cell_recovery_summary['forcing'] == forcing]
    ax.scatter(subset['total_degree'], subset['recovered_mean'],
               c=[colors[i]], alpha=0.7, label=f'f={forcing}')
ax.set_xlabel('Total Degree (connectivity)', fontsize=12)
ax.set_ylabel('Recovery Rate', fontsize=12)
ax.set_title('Recovery vs Connectivity', fontsize=14)
ax.legend()
ax.grid(True, alpha=0.3)

# Panel 2: Recovery Rate vs Net Flow
ax = axes[0, 1]
for i, forcing in enumerate(SWEEP_CONFIG['forcing_values']):
    subset = cell_recovery_summary[cell_recovery_summary['forcing'] == forcing]
    ax.scatter(subset['net_flow'], subset['recovered_mean'],
               c=[colors[i]], alpha=0.7, label=f'f={forcing}')
ax.axvline(0, color='gray', linestyle='--', alpha=0.7)
ax.set_xlabel('Net Flow (+ = sink, - = source)', fontsize=12)
ax.set_ylabel('Recovery Rate', fontsize=12)
ax.set_title('Recovery vs Net Flow Balance', fontsize=14)
ax.legend()
ax.grid(True, alpha=0.3)

# Panel 3: Recovery Time Distribution
ax = axes[0, 2]
for i, forcing in enumerate(SWEEP_CONFIG['forcing_values']):
    subset = cell_df[(cell_df['forcing'] == forcing) & (cell_df['recovered'] == True)]
    if len(subset) > 0:
        ax.hist(subset['recovery_time'].dropna(), bins=30, alpha=0.5,
                color=colors[i], label=f'f={forcing}')
ax.set_xlabel('Recovery Time', fontsize=12)
ax.set_ylabel('Count', fontsize=12)
ax.set_title('Recovery Time Distribution', fontsize=14)
ax.legend()
ax.grid(True, alpha=0.3)

# Panel 4: First Tip Time vs Recovery Time
ax = axes[1, 0]
recovered = cell_df[cell_df['recovered'] == True]
if len(recovered) > 0:
    scatter = ax.scatter(recovered['first_tip_time'], recovered['recovery_time'],
                         c=recovered['total_degree'], cmap='viridis',
                         alpha=0.5, s=20)
    plt.colorbar(scatter, ax=ax, label='Connectivity')
ax.set_xlabel('First Tip Time (cascade phase)', fontsize=12)
ax.set_ylabel('Recovery Time (recovery phase)', fontsize=12)
ax.set_title('Tip vs Recovery Timing', fontsize=14)
ax.grid(True, alpha=0.3)

# Panel 5: Cell Recovery Heatmap (by cell_id and forcing)
ax = axes[1, 1]
pivot = cell_recovery_summary.pivot(index='cell_id', columns='forcing', 
                                     values='recovered_mean')
im = ax.imshow(pivot.values, cmap='RdYlGn', aspect='auto', vmin=0, vmax=1)
ax.set_xticks(range(len(pivot.columns)))
ax.set_xticklabels([f'{f:.1f}' for f in pivot.columns])
ax.set_ylabel('Cell ID', fontsize=12)
ax.set_xlabel('Forcing', fontsize=12)
ax.set_title('Recovery Rate by Cell and Forcing', fontsize=14)
plt.colorbar(im, ax=ax, label='Recovery Rate')

# Panel 6: Recovery Rate by Forcing (violin plot)
ax = axes[1, 2]
data_by_forcing = [cell_recovery_summary[cell_recovery_summary['forcing'] == f]['recovered_mean'].values
                   for f in SWEEP_CONFIG['forcing_values']]
parts = ax.violinplot(data_by_forcing, showmeans=True, showmedians=True)
ax.set_xticks(range(1, len(SWEEP_CONFIG['forcing_values']) + 1))
ax.set_xticklabels([f'{f:.1f}' for f in SWEEP_CONFIG['forcing_values']])
ax.set_xlabel('Forcing', fontsize=12)
ax.set_ylabel('Cell Recovery Rate', fontsize=12)
ax.set_title('Recovery Rate Distribution by Forcing', fontsize=14)
ax.grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.savefig('/workspace/data/exp13_recovery_patterns.png', dpi=150, bbox_inches='tight')
plt.show()
print("\nPlot saved to /workspace/data/exp13_recovery_patterns.png")

## 10. Recovery Sequence Analysis

In [None]:
# Analyze the ORDER of recovery
print("=" * 70)
print("RECOVERY SEQUENCE ANALYSIS")
print("=" * 70)

# For each run, rank cells by recovery time
sequence_results = []

for r in all_results:
    # Get cells that recovered with their times
    recovered_cells = [(c['cell_id'], c['recovery_time']) 
                       for c in r['cell_results'] 
                       if c['recovered'] and not np.isnan(c['recovery_time'])]
    
    if len(recovered_cells) > 1:
        # Sort by recovery time
        recovered_cells.sort(key=lambda x: x[1])
        
        # Assign ranks
        for rank, (cell_id, rec_time) in enumerate(recovered_cells):
            sequence_results.append({
                'forcing': r['forcing'],
                'seed': r['seed'],
                'cell_id': cell_id,
                'recovery_rank': rank + 1,
                'recovery_time': rec_time,
                'n_recovered_this_run': len(recovered_cells),
            })

sequence_df = pd.DataFrame(sequence_results)
sequence_df = sequence_df.merge(cell_info, on='cell_id')

# Normalize rank (0-1 scale)
sequence_df['normalized_rank'] = sequence_df['recovery_rank'] / sequence_df['n_recovered_this_run']

print(f"\nRecovery sequence data: {len(sequence_df)} records")

In [None]:
# Identify early vs late recoverers
early_threshold = 0.25  # First 25% to recover

early_recoverers = sequence_df[sequence_df['normalized_rank'] <= early_threshold]
late_recoverers = sequence_df[sequence_df['normalized_rank'] >= (1 - early_threshold)]

print("=" * 70)
print("EARLY vs LATE RECOVERERS")
print("=" * 70)

print(f"\nEarly recoverers (first {early_threshold*100:.0f}%): {len(early_recoverers)} observations")
print(f"Late recoverers (last {early_threshold*100:.0f}%): {len(late_recoverers)} observations")

# Compare properties
properties = ['total_degree', 'in_flow', 'net_flow', 'rain_evap_ratio']

print(f"\n{'Property':<20} {'Early Mean':<15} {'Late Mean':<15} {'Difference':<15}")
print("-" * 65)
for prop in properties:
    early_mean = early_recoverers[prop].mean()
    late_mean = late_recoverers[prop].mean()
    diff = early_mean - late_mean
    print(f"{prop:<20} {early_mean:<15.2f} {late_mean:<15.2f} {diff:<+15.2f}")

In [None]:
# Plot recovery sequence patterns
fig, axes = plt.subplots(1, 2, figsize=(12, 5))

# Panel 1: Normalized Rank vs Connectivity
ax = axes[0]
for i, forcing in enumerate(SWEEP_CONFIG['forcing_values']):
    subset = sequence_df[sequence_df['forcing'] == forcing]
    if len(subset) > 0:
        ax.scatter(subset['total_degree'], subset['normalized_rank'],
                   c=[colors[i]], alpha=0.3, s=20, label=f'f={forcing}')
ax.set_xlabel('Total Degree (connectivity)', fontsize=12)
ax.set_ylabel('Normalized Recovery Rank (0=first, 1=last)', fontsize=12)
ax.set_title('Recovery Order vs Connectivity', fontsize=14)
ax.legend()
ax.grid(True, alpha=0.3)

# Panel 2: Mean Recovery Rank by Cell
ax = axes[1]
mean_rank = sequence_df.groupby('cell_id')['normalized_rank'].mean().sort_values()
cell_order = mean_rank.index.values

ax.bar(range(len(mean_rank)), mean_rank.values, 
       color=plt.cm.RdYlGn_r(mean_rank.values))
ax.axhline(0.5, color='gray', linestyle='--', alpha=0.7)
ax.set_xlabel('Cell (ordered by mean recovery rank)', fontsize=12)
ax.set_ylabel('Mean Normalized Recovery Rank', fontsize=12)
ax.set_title('Cells Ordered by Recovery Timing', fontsize=14)
ax.grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.savefig('/workspace/data/exp13_recovery_sequence.png', dpi=150, bbox_inches='tight')
plt.show()

## 11. Key Findings Summary

In [None]:
print("\n" + "=" * 70)
print("EXPERIMENT 13: KEY FINDINGS")
print("=" * 70)

# 1. Best and worst recovering cells
best_cell = cell_recovery_summary.loc[cell_recovery_summary['recovered_mean'].idxmax()]
worst_cell = cell_recovery_summary[cell_recovery_summary['tipped_at_cascade_end_'] > 0.5].sort_values(
    'recovered_mean'
).iloc[0] if len(cell_recovery_summary[cell_recovery_summary['tipped_at_cascade_end_'] > 0.5]) > 0 else None

print(f"""
1. CELL-LEVEL RECOVERY VARIATION:
   Best recovering cell: cell_{int(best_cell['cell_id'])}
     - Recovery rate: {best_cell['recovered_mean']:.1%}
     - Total degree: {best_cell['total_degree']:.0f}
     - Net flow: {best_cell['net_flow']:.1f} mm
""")

if worst_cell is not None:
    print(f"""   Worst recovering cell (among those frequently tipped): cell_{int(worst_cell['cell_id'])}
     - Recovery rate: {worst_cell['recovered_mean']:.1%}
     - Total degree: {worst_cell['total_degree']:.0f}
     - Net flow: {worst_cell['net_flow']:.1f} mm
""")

# 2. Recovery predictors
print("2. RECOVERY PREDICTORS (passive recovery, f=0):")
passive = cell_recovery_summary[cell_recovery_summary['forcing'] == 0]
for pred in ['total_degree', 'in_flow', 'net_flow']:
    r, p = spearmanr(passive[pred], passive['recovered_mean'])
    direction = "higher" if r > 0 else "lower"
    print(f"   {pred}: ρ = {r:+.3f} ({direction} {pred} → better recovery)")

# 3. Early vs late recoverers
print(f"""
3. RECOVERY SEQUENCE:
   Early recoverers tend to have:
""")
for prop in properties:
    early_mean = early_recoverers[prop].mean()
    late_mean = late_recoverers[prop].mean()
    if abs(early_mean - late_mean) / max(abs(early_mean), 1) > 0.1:
        direction = "higher" if early_mean > late_mean else "lower"
        print(f"     - {direction} {prop}")

# 4. Mean recovery times
print("\n4. MEAN RECOVERY TIMES BY FORCING:")
for forcing in SWEEP_CONFIG['forcing_values']:
    recovered = cell_df[(cell_df['forcing'] == forcing) & (cell_df['recovered'] == True)]
    if len(recovered) > 0:
        mean_time = recovered['recovery_time'].mean()
        print(f"   f = {forcing:.1f}: {mean_time:.1f} time units")

## 12. Save Results

In [None]:
run_df.to_csv('/workspace/data/experiment13_run_summary.csv', index=False)
print(f"Run summary saved to /workspace/data/experiment13_run_summary.csv")

cell_recovery_summary.to_csv('/workspace/data/experiment13_cell_recovery.csv', index=False)
print(f"Cell recovery summary saved to /workspace/data/experiment13_cell_recovery.csv")

sequence_df.to_csv('/workspace/data/experiment13_recovery_sequence.csv', index=False)
print(f"Recovery sequence data saved to /workspace/data/experiment13_recovery_sequence.csv")

In [None]:
print("\n" + "=" * 70)
print("EXPERIMENT 13 COMPLETE")
print("=" * 70)

print(f"""
CONFIGURATION:
- Forcing levels: {SWEEP_CONFIG['forcing_values']}
- Runs per forcing: {SWEEP_CONFIG['n_runs_per_forcing']}
- Total simulations: {len(all_results)}
- Runtime: {elapsed:.1f}s ({elapsed/60:.1f} min)

FILES GENERATED:
- /workspace/data/experiment13_run_summary.csv
- /workspace/data/experiment13_cell_recovery.csv
- /workspace/data/experiment13_recovery_sequence.csv
- /workspace/data/exp13_recovery_patterns.png
- /workspace/data/exp13_recovery_sequence.png

KEY INSIGHTS:
- Cell-level recovery rates vary significantly
- Network topology predicts recovery potential
- Recovery follows predictable spatial patterns
""")