# Experiment 8: Network Fragmentation and Deforestation Scenarios

**Phase 4: Analysis and Validation**

**Author:** Jason Holt  
**Date:** December 2025

---

## Objective

Test whether progressive removal of network connections causes thermodynamic buffering to collapse, and identify critical connectivity thresholds.

## Scientific Rationale

Phase 3 found that the Amazon moisture recycling network provides scale-invariant buffering (tip/recovery ratio ≈ 1.0). This suggests network connectivity is essential for resilience. Deforestation breaks moisture recycling pathways—if buffering depends on connectivity, there may be a critical threshold below which resilience collapses catastrophically.

## Key Questions

1. At what connectivity threshold does thermodynamic buffering collapse?
2. Does the fragmentation method (random vs targeted) affect the threshold?
3. Are there "keystone" connections whose removal is catastrophic?

## Setup

In [None]:
import sys
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import pandas as pd
from tqdm import tqdm

# Add src to path - use k3s mount path
research_path = '/opt/research-local/src'
if research_path not in sys.path:
    sys.path.insert(0, research_path)

# Import energy-constrained module
from energy_constrained import (
    EnergyConstrainedNetwork,
    EnergyConstrainedCusp,
    GradientDrivenCoupling,
    EnergyAnalyzer,
    fragment_network,
    compute_network_metrics,
    run_fragmentation_sweep,
    get_dask_client,
    run_ensemble_parallel,
    results_to_solver_results,
    DASK_SUPPORT
)

print(f"Dask support: {DASK_SUPPORT}")

# Set random seed for reproducibility
np.random.seed(42)

In [None]:
# Initialize Dask client
dask_client = None
if DASK_SUPPORT:
    try:
        dask_client = get_dask_client()
        if dask_client:
            print(f"Dask dashboard: {dask_client.dashboard_link}")
            print(f"Connected to {len(dask_client.scheduler_info()['workers'])} Dask workers")
    except Exception as e:
        print(f"Dask initialization failed: {e}")
        dask_client = None

print("\nDask cluster ready for parallel ensemble runs!")

## Section 1: Load Amazon Network Data

We'll use the 50-cell Amazon subnetwork from Phase 3 as our baseline.

In [None]:
# Load Amazon moisture recycling data
from netCDF4 import Dataset

DATA_PATH = Path('/opt/research-local/data/amazon/amazon_adaptation_model/average_network/era5_new_network_data')

def load_amazon_data(year=2003, months=[7, 8, 9]):
    """Load and average Amazon moisture recycling data for specified months."""
    all_rain = []
    all_evap = []
    all_network = []
    
    for month in months:
        file_path = DATA_PATH / f'1deg_{year}_{month:02d}.nc'
        if file_path.exists():
            with Dataset(file_path, 'r') as ds:
                all_rain.append(ds.variables['rain'][:])
                all_evap.append(ds.variables['evap'][:])
                all_network.append(ds.variables['network'][:])
    
    return {
        'rain': np.mean(all_rain, axis=0),
        'evap': np.mean(all_evap, axis=0),
        'network': np.mean(all_network, axis=0),
        'n_cells': len(all_rain[0])
    }

# Load 2003 (normal year) data
amazon_data = load_amazon_data(year=2003)
print(f"Loaded Amazon data: {amazon_data['n_cells']} cells")
print(f"Network shape: {amazon_data['network'].shape}")
print(f"Rain range: {amazon_data['rain'].min():.1f} - {amazon_data['rain'].max():.1f} mm")

In [None]:
def create_amazon_network(data, n_cells=50, min_flow=1.0):
    """
    Create EnergyConstrainedNetwork from Amazon moisture recycling data.
    
    Parameters
    ----------
    data : dict
        Amazon data with 'rain', 'evap', 'network' arrays
    n_cells : int
        Number of cells to include (top by connectivity)
    min_flow : float
        Minimum moisture flow to create edge (mm/month)
        
    Returns
    -------
    EnergyConstrainedNetwork
        Network ready for simulation
    """
    network_matrix = data['network']
    rain = data['rain']
    evap = data['evap']
    
    # Select top cells by total connectivity
    total_flow = network_matrix.sum(axis=0) + network_matrix.sum(axis=1)
    top_indices = np.argsort(total_flow)[-n_cells:]
    
    # Create network
    net = EnergyConstrainedNetwork()
    
    # Add elements with barrier heights based on rain/evap ratio
    for i, idx in enumerate(top_indices):
        # Barrier height: lower rain/evap = more vulnerable
        ratio = min(rain[idx] / max(evap[idx], 1), 2.0)
        barrier_height = 0.3 + 0.4 * min(ratio / 2, 1.0)
        
        # Use correct EnergyConstrainedCusp parameters
        element = EnergyConstrainedCusp(
            a=-1.0,           # Cusp parameter a
            b=1.0,            # Cusp parameter b
            c=0.0,            # Cusp parameter c
            x_0=0.0,          # Initial state
            barrier_height=barrier_height,
            dissipation_rate=0.1
        )
        net.add_element(f'cell_{i}', element)
    
    # Add couplings based on moisture flow
    # GradientDrivenCoupling uses conductivity (not strength)
    n_edges = 0
    for i, idx_i in enumerate(top_indices):
        for j, idx_j in enumerate(top_indices):
            if i != j:
                flow = network_matrix[idx_i, idx_j]
                if flow > min_flow:
                    # Scale conductivity by flow magnitude
                    coupling = GradientDrivenCoupling(
                        conductivity=flow / 100.0,
                        state_coupling=0.1
                    )
                    net.add_coupling(f'cell_{i}', f'cell_{j}', coupling)
                    n_edges += 1
    
    print(f"Created network: {net.n_elements} nodes, {n_edges} edges")
    return net

# Create baseline 50-cell network
baseline_network = create_amazon_network(amazon_data, n_cells=50, min_flow=1.0)
baseline_metrics = compute_network_metrics(baseline_network)
print(f"\nBaseline network metrics:")
for k, v in baseline_metrics.items():
    print(f"  {k}: {v:.4f}" if isinstance(v, float) else f"  {k}: {v}")

## Section 2: Test Fragmentation Function

Verify the fragmentation function works correctly before running experiments.

In [None]:
# Test fragmentation at different levels
test_levels = [1.0, 0.75, 0.50, 0.25, 0.10]

print("Testing network fragmentation:")
print(f"{'Retention':<12} {'Nodes':<8} {'Edges':<8} {'Density':<10} {'Components':<12}")
print("-" * 50)

for retention in test_levels:
    fragmented = fragment_network(baseline_network, retention, method='random', seed=42)
    metrics = compute_network_metrics(fragmented)
    print(f"{retention:<12.0%} {metrics['n_nodes']:<8} {metrics['n_edges']:<8} {metrics['density']:<10.4f} {metrics['n_components']:<12}")

In [None]:
# Test different fragmentation methods
methods = ['random', 'low_flow_first', 'high_betweenness_first']
retention = 0.5

print(f"\nFragmentation methods at {retention:.0%} retention:")
print(f"{'Method':<25} {'Edges':<8} {'Density':<10} {'Clustering':<12} {'Components':<12}")
print("-" * 70)

for method in methods:
    fragmented = fragment_network(baseline_network, retention, method=method, seed=42)
    metrics = compute_network_metrics(fragmented)
    print(f"{method:<25} {metrics['n_edges']:<8} {metrics['density']:<10.4f} {metrics['clustering']:<12.4f} {metrics['n_components']:<12}")

## Section 3: Fragmentation Experiment Configuration

Define the experimental parameters for the fragmentation sweep.

In [None]:
# Experiment configuration
FRAGMENTATION_CONFIG = {
    'retention_fractions': [1.0, 0.90, 0.75, 0.50, 0.25, 0.10],
    'methods': ['random', 'high_betweenness_first'],
    'n_fragmentation_replicates': 3,  # Random fragmentation replicates
    'n_simulation_runs': 10,  # Ensemble size per network
    'duration': 500.0,
    'dt': 0.5,
    'sigma': 0.06,
    'alpha': 1.5,  # Lévy noise (cascade-triggering)
    'seed': 42
}

# Calculate total simulations
n_levels = len(FRAGMENTATION_CONFIG['retention_fractions'])
n_methods = len(FRAGMENTATION_CONFIG['methods'])
n_frag_reps = FRAGMENTATION_CONFIG['n_fragmentation_replicates']
n_sim_runs = FRAGMENTATION_CONFIG['n_simulation_runs']

total_sims = n_levels * n_methods * n_frag_reps * n_sim_runs
print(f"Experiment Configuration:")
print(f"  Retention levels: {n_levels}")
print(f"  Fragmentation methods: {n_methods}")
print(f"  Fragmentation replicates: {n_frag_reps}")
print(f"  Simulation runs per network: {n_sim_runs}")
print(f"  Total simulations: {total_sims}")
print(f"  " + "="*40)
print(f"  Noise: σ={FRAGMENTATION_CONFIG['sigma']}, α={FRAGMENTATION_CONFIG['alpha']} (Lévy)")
print(f"  Duration: {FRAGMENTATION_CONFIG['duration']} time units")

## Section 4: Run Fragmentation Sweep Experiment

Run simulations across all fragmentation levels and methods.

In [None]:
def run_fragmentation_experiment(baseline_net, config):
    """
    Run complete fragmentation experiment.
    
    Returns DataFrame with results.
    """
    results = []
    
    for method in config['methods']:
        print(f"\n{'='*60}")
        print(f"Method: {method}")
        print(f"{'='*60}")
        
        for retention in config['retention_fractions']:
            print(f"\n  Retention: {retention:.0%}")
            
            for rep in range(config['n_fragmentation_replicates']):
                # Create fragmented network
                frag_seed = config['seed'] + rep * 1000
                
                if retention == 1.0:
                    # Use baseline network (no fragmentation)
                    network = baseline_net
                    net_metrics = compute_network_metrics(network)
                else:
                    network = fragment_network(
                        baseline_net,
                        retention_fraction=retention,
                        method=method,
                        seed=frag_seed
                    )
                    net_metrics = compute_network_metrics(network)
                
                # Run ensemble
                sim_seed = frag_seed + rep
                
                if dask_client:
                    ensemble_results = run_ensemble_parallel(
                        network,
                        n_runs=config['n_simulation_runs'],
                        duration=config['duration'],
                        dt=config['dt'],
                        sigma=config['sigma'],
                        alpha=config['alpha'],
                        seed=sim_seed
                    )
                else:
                    from energy_constrained.solvers import run_ensemble
                    ensemble_results = run_ensemble(
                        network,
                        n_runs=config['n_simulation_runs'],
                        duration=config['duration'],
                        dt=config['dt'],
                        sigma=config['sigma'],
                        alpha=config['alpha'],
                        seed=sim_seed
                    )
                    ensemble_results = [
                        {'run_idx': i, 't': r.t, 'x': r.x, 'E': r.E, 
                         'y': r.y, 'diagnostics': r.diagnostics}
                        for i, r in enumerate(ensemble_results)
                    ]
                
                # Convert to solver results
                solver_results = results_to_solver_results(ensemble_results) if results_to_solver_results else ensemble_results
                
                # Aggregate metrics across ensemble
                total_entropy = 0
                n_tip_events = 0
                n_recovery_events = 0
                pct_tipped = 0
                
                for result in solver_results:
                    # Create analyzer for each result (requires both network and result)
                    analyzer = EnergyAnalyzer(network, result)
                    
                    # Get total entropy
                    total_entropy += analyzer.compute_total_entropy_produced()
                    
                    # Count tipping events (TippingEvent is a dataclass with .direction attribute)
                    events = analyzer.identify_tipping_events()
                    for event in events:
                        if event.direction == 'tip':
                            n_tip_events += 1
                        else:
                            n_recovery_events += 1
                    
                    # Percent time tipped
                    x_final = result.x[-1] if hasattr(result, 'x') else result['x'][-1]
                    pct_tipped += np.mean(x_final > 0) * 100
                
                # Average across ensemble
                n_runs = config['n_simulation_runs']
                avg_entropy = total_entropy / n_runs
                avg_tip_events = n_tip_events / n_runs
                avg_recovery_events = n_recovery_events / n_runs
                avg_pct_tipped = pct_tipped / n_runs
                
                # Tip/recovery ratio based on EVENT COUNTS (not entropy)
                # This is more robust and meaningful
                if n_recovery_events > 0:
                    tip_recovery_ratio = n_tip_events / n_recovery_events
                else:
                    tip_recovery_ratio = np.nan
                
                # Store results
                results.append({
                    'method': method,
                    'retention': retention,
                    'replicate': rep,
                    'n_edges': net_metrics['n_edges'],
                    'density': net_metrics['density'],
                    'n_components': net_metrics['n_components'],
                    'largest_component': net_metrics['largest_component_fraction'],
                    'avg_entropy': avg_entropy,
                    'avg_tip_events': avg_tip_events,
                    'avg_recovery_events': avg_recovery_events,
                    'tip_recovery_ratio': tip_recovery_ratio,
                    'avg_pct_tipped': avg_pct_tipped
                })
                
                print(f"    Rep {rep}: edges={net_metrics['n_edges']}, "
                      f"entropy={avg_entropy:.1f}, tip/rec={tip_recovery_ratio:.3f}")
    
    return pd.DataFrame(results)

print("Starting fragmentation experiment...")
print(f"Total simulations: {total_sims}")

In [None]:
# Run the experiment
fragmentation_results = run_fragmentation_experiment(baseline_network, FRAGMENTATION_CONFIG)

print(f"\n\nExperiment complete!")
print(f"Results shape: {fragmentation_results.shape}")

## Section 5: Analyze Results

In [None]:
# Summary statistics by retention level and method
summary = fragmentation_results.groupby(['method', 'retention']).agg({
    'n_edges': 'mean',
    'avg_entropy': ['mean', 'std'],
    'tip_recovery_ratio': ['mean', 'std'],
    'avg_pct_tipped': ['mean', 'std'],
    'n_components': 'mean'
}).round(3)

print("Summary Statistics:")
print(summary)

In [None]:
# Plot tip/recovery ratio vs retention
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

methods = fragmentation_results['method'].unique()
colors = ['blue', 'red', 'green']

# Plot 1: Tip/Recovery Ratio
ax = axes[0]
for i, method in enumerate(methods):
    data = fragmentation_results[fragmentation_results['method'] == method]
    grouped = data.groupby('retention')['tip_recovery_ratio'].agg(['mean', 'std'])
    ax.errorbar(grouped.index * 100, grouped['mean'], yerr=grouped['std'],
                marker='o', label=method, color=colors[i], capsize=3)

ax.axhline(y=1.0, color='gray', linestyle='--', label='Symmetric (ratio=1)')
ax.set_xlabel('Edge Retention (%)')
ax.set_ylabel('Tip/Recovery Entropy Ratio')
ax.set_title('Thermodynamic Asymmetry vs Fragmentation')
ax.legend()
ax.set_xlim(0, 105)

# Plot 2: Total Entropy
ax = axes[1]
for i, method in enumerate(methods):
    data = fragmentation_results[fragmentation_results['method'] == method]
    grouped = data.groupby('retention')['avg_entropy'].agg(['mean', 'std'])
    ax.errorbar(grouped.index * 100, grouped['mean'], yerr=grouped['std'],
                marker='o', label=method, color=colors[i], capsize=3)

ax.set_xlabel('Edge Retention (%)')
ax.set_ylabel('Total Entropy')
ax.set_title('Entropy Production vs Fragmentation')
ax.legend()
ax.set_xlim(0, 105)

# Plot 3: Percent Tipped
ax = axes[2]
for i, method in enumerate(methods):
    data = fragmentation_results[fragmentation_results['method'] == method]
    grouped = data.groupby('retention')['avg_pct_tipped'].agg(['mean', 'std'])
    ax.errorbar(grouped.index * 100, grouped['mean'], yerr=grouped['std'],
                marker='o', label=method, color=colors[i], capsize=3)

ax.set_xlabel('Edge Retention (%)')
ax.set_ylabel('Cells Tipped (%)')
ax.set_title('Cascade Extent vs Fragmentation')
ax.legend()
ax.set_xlim(0, 105)

plt.tight_layout()
plt.savefig('/workspace/data/fragmentation_results.png', dpi=150, bbox_inches='tight')
plt.show()

print("\nPlot saved to /workspace/data/fragmentation_results.png")

## Section 6: Identify Critical Threshold

Find the fragmentation level where buffering collapses (tip/recovery ratio significantly > 1).

In [None]:
# Find threshold where ratio significantly exceeds 1
THRESHOLD_CRITERION = 1.5  # Ratio above which buffering is "collapsed"

print(f"Critical threshold analysis (criterion: ratio > {THRESHOLD_CRITERION})")
print("="*60)

for method in methods:
    data = fragmentation_results[fragmentation_results['method'] == method]
    grouped = data.groupby('retention')['tip_recovery_ratio'].mean()
    
    # Find first retention level where ratio exceeds threshold
    exceeded = grouped[grouped > THRESHOLD_CRITERION]
    if len(exceeded) > 0:
        critical = exceeded.index.max()  # Highest retention where threshold exceeded
        print(f"\n{method}:")
        print(f"  Critical threshold: {critical:.0%} retention")
        print(f"  Ratio at threshold: {grouped[critical]:.3f}")
    else:
        print(f"\n{method}: Buffering maintained at all levels (ratio never > {THRESHOLD_CRITERION})")

## Section 7: Key Findings Summary

In [None]:
# Generate summary table
print("\n" + "="*70)
print("EXPERIMENT 8: NETWORK FRAGMENTATION RESULTS")
print("="*70)

# Baseline vs most fragmented
baseline = fragmentation_results[fragmentation_results['retention'] == 1.0]
most_fragmented = fragmentation_results[fragmentation_results['retention'] == 0.10]

print(f"\nBaseline (100% retention):")
print(f"  Edges: {baseline['n_edges'].mean():.0f}")
print(f"  Tip/Recovery Ratio: {baseline['tip_recovery_ratio'].mean():.3f}")
print(f"  Total Entropy: {baseline['avg_entropy'].mean():.1f}")
print(f"  % Tipped: {baseline['avg_pct_tipped'].mean():.1f}%")

print(f"\nMost Fragmented (10% retention):")
print(f"  Edges: {most_fragmented['n_edges'].mean():.0f}")
print(f"  Tip/Recovery Ratio: {most_fragmented['tip_recovery_ratio'].mean():.3f}")
print(f"  Total Entropy: {most_fragmented['avg_entropy'].mean():.1f}")
print(f"  % Tipped: {most_fragmented['avg_pct_tipped'].mean():.1f}%")

# Change
ratio_change = most_fragmented['tip_recovery_ratio'].mean() / baseline['tip_recovery_ratio'].mean()
entropy_change = (most_fragmented['avg_entropy'].mean() - baseline['avg_entropy'].mean()) / baseline['avg_entropy'].mean() * 100

print(f"\nChange at 10% fragmentation:")
print(f"  Tip/Recovery Ratio: {ratio_change:.1f}x")
print(f"  Entropy: {entropy_change:+.1f}%")

In [None]:
# Save results to CSV
output_path = '/workspace/data/experiment8_fragmentation_results.csv'
fragmentation_results.to_csv(output_path, index=False)
print(f"Results saved to {output_path}")

## Conclusions

### Key Findings

1. **[To be filled after running experiment]**

2. **[To be filled after running experiment]**

3. **[To be filled after running experiment]**

### Implications for Conservation

- [To be filled after analyzing results]

### Next Steps

- Experiment 9: Recovery Dynamics
- Experiment 10: α-Sweep for Lévy-Gaussian transition