# Experiment 12c: Connectivity Threshold Mapping

**Objective**: Map the recovery-connectivity relationship to identify the minimum network density required for meaningful recovery, and determine whether preserving keystone edges shifts this threshold.

## Background

Experiment 12b revealed:
- **6 keystone edges alone** → 0.2% recovery (insufficient)
- **Full network (~1000 edges)** → 41.6% recovery
- Somewhere between lies a critical connectivity threshold

## Key Questions

1. What is the minimum connectivity for meaningful recovery (>10%)?
2. Does keystone preservation shift this threshold?
3. Is the transition sharp (phase transition) or gradual (continuous)?

In [None]:
import sys
sys.path.insert(0, '/opt/research-local/src')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle
import time
from pathlib import Path
from netCDF4 import Dataset
from dask.distributed import as_completed

from energy_constrained import get_dask_client

print("Imports complete")

In [None]:
# Connect to Dask cluster
client = get_dask_client()
print(f"Connected to Dask cluster with {len(client.scheduler_info()['workers'])} workers")
client

In [None]:
DATA_PATH = Path('/opt/research-local/data/amazon/amazon_adaptation_model/average_network/era5_new_network_data')

def load_amazon_data(year=2003, months=[7, 8, 9]):
    """Load and average Amazon moisture recycling data."""
    all_rain = []
    all_evap = []
    all_network = []
    
    for month in months:
        file_path = DATA_PATH / f'1deg_{year}_{month:02d}.nc'
        if file_path.exists():
            with Dataset(file_path, 'r') as ds:
                all_rain.append(ds.variables['rain'][:])
                all_evap.append(ds.variables['evap'][:])
                all_network.append(ds.variables['network'][:])
    
    return {
        'rain': np.mean(all_rain, axis=0),
        'evap': np.mean(all_evap, axis=0),
        'network': np.mean(all_network, axis=0),
        'n_cells': len(all_rain[0])
    }

amazon_data = load_amazon_data(year=2003)
network_matrix = amazon_data['network']
print(f"Loaded Amazon data: {amazon_data['n_cells']} cells")
print(f"Network shape: {network_matrix.shape}")
print(f"Total moisture flow: {network_matrix.sum():.1f} mm")

In [None]:
# Configuration
CONFIG = {
    'n_cells': 50,
    'min_flow': 1.0,
    'barrier_height': 0.2,
    'cascade_duration': 200,
    'recovery_duration': 800,
    'dt': 0.5,
    'cascade_sigma': 0.06,
    'cascade_alpha': 1.5,
    'recovery_sigma': 0.04,
    'recovery_alpha': 2.0,
    'n_runs': 20,
    'base_seed': 42,
}

# Connectivity levels to test
CONNECTIVITY_LEVELS = [0.01, 0.02, 0.05, 0.10, 0.15, 0.20, 0.25, 0.50, 0.75, 1.00]

# Fragmentation strategies
STRATEGIES = ['random', 'keystone_preserve', 'keystone_remove']

# Keystone edges from Experiment 12
KEYSTONE_EDGES = [
    ('cell_28', 'cell_14'),  # Most critical: -7.9% recovery impact
    ('cell_27', 'cell_12'),
    ('cell_32', 'cell_40'),
    ('cell_31', 'cell_4'),
    ('cell_17', 'cell_32'),
    ('cell_31', 'cell_24'),
]

print(f"Connectivity levels: {CONNECTIVITY_LEVELS}")
print(f"Strategies: {STRATEGIES}")
print(f"Runs per condition: {CONFIG['n_runs']}")
print(f"Total simulations: {len(CONNECTIVITY_LEVELS) * len(STRATEGIES) * CONFIG['n_runs']}")

In [None]:
def run_connectivity_experiment(data_bytes, connectivity, strategy, keystone_edges, config, seed):
    """
    Worker function for connectivity threshold experiment.
    Uses run_two_phase_experiment from solvers module.
    """
    import numpy as np
    import pickle
    import sys
    
    if '/opt/research-local/src' not in sys.path:
        sys.path.insert(0, '/opt/research-local/src')
    
    from energy_constrained import (
        EnergyConstrainedNetwork,
        EnergyConstrainedCusp,
        GradientDrivenCoupling,
    )
    from energy_constrained.solvers import run_two_phase_experiment
    
    # Deserialize data
    data = pickle.loads(data_bytes)
    rng = np.random.default_rng(seed)
    
    # Extract parameters
    network_matrix = data['network']
    n_cells = config['n_cells']
    min_flow = config['min_flow']
    barrier_height = config['barrier_height']
    
    # Select top cells by total flow
    total_flow = network_matrix.sum(axis=0) + network_matrix.sum(axis=1)
    top_indices = np.argsort(total_flow)[-n_cells:]
    
    # Build network
    net = EnergyConstrainedNetwork()
    
    # Add elements
    for i in range(n_cells):
        element = EnergyConstrainedCusp(
            a=-1.0, b=1.0, c=0.0, x_0=0.0,
            barrier_height=barrier_height,
            dissipation_rate=0.1
        )
        net.add_element(f'cell_{i}', element)
    
    # Build all possible edges
    all_edges = {}
    for i, idx_i in enumerate(top_indices):
        for j, idx_j in enumerate(top_indices):
            if i != j:
                flow = network_matrix[idx_i, idx_j]
                if flow > min_flow:
                    all_edges[(f'cell_{i}', f'cell_{j}')] = flow
    
    total_edges = len(all_edges)
    n_keep = max(1, int(total_edges * connectivity))
    
    # Convert keystone edges to set for fast lookup
    keystone_set = set(tuple(e) for e in keystone_edges)
    non_keystone_edges = [e for e in all_edges.keys() if e not in keystone_set]
    keystone_in_network = [e for e in keystone_edges if tuple(e) in all_edges]
    
    # Select edges based on strategy
    if strategy == 'random':
        all_edge_list = list(all_edges.keys())
        rng.shuffle(all_edge_list)
        edges_to_keep = all_edge_list[:n_keep]
        
    elif strategy == 'keystone_preserve':
        # Always include keystones (up to n_keep)
        edges_to_keep = list(keystone_in_network[:min(len(keystone_in_network), n_keep)])
        remaining = n_keep - len(edges_to_keep)
        if remaining > 0:
            non_keystone_list = list(non_keystone_edges)
            rng.shuffle(non_keystone_list)
            edges_to_keep.extend(non_keystone_list[:remaining])
            
    elif strategy == 'keystone_remove':
        # Exclude keystones
        available = list(non_keystone_edges)
        rng.shuffle(available)
        edges_to_keep = available[:min(len(available), n_keep)]
    
    edges_set = set(tuple(e) for e in edges_to_keep)
    
    # Count keystone edges in final selection
    n_keystone_kept = sum(1 for e in keystone_in_network if tuple(e) in edges_set)
    
    # Add couplings for kept edges
    n_edges_added = 0
    for (src, tgt), flow in all_edges.items():
        if (src, tgt) in edges_set:
            coupling = GradientDrivenCoupling(
                conductivity=flow / 100.0,
                state_coupling=0.1
            )
            net.add_coupling(src, tgt, coupling)
            n_edges_added += 1
    
    # Run two-phase experiment
    result = run_two_phase_experiment(
        network=net,
        cascade_duration=config['cascade_duration'],
        recovery_duration=config['recovery_duration'],
        dt=config['dt'],
        cascade_sigma=config['cascade_sigma'],
        cascade_alpha=config['cascade_alpha'],
        recovery_sigma=config['recovery_sigma'],
        recovery_alpha=config['recovery_alpha'],
        seed=seed
    )
    
    # Extract metrics
    n_cells_actual = result.x_full.shape[1]
    n_tip_events = 0
    n_recover_events = 0
    
    for j in range(n_cells_actual):
        x_traj = result.x_full[:, j]
        signs = np.sign(x_traj)
        sign_changes = np.diff(signs)
        n_tip_events += np.sum(sign_changes > 0)
        n_recover_events += np.sum(sign_changes < 0)
    
    tip_recovery_ratio = n_tip_events / n_recover_events if n_recover_events > 0 else np.nan
    
    return {
        'connectivity': connectivity,
        'strategy': strategy,
        'n_edges': n_edges_added,
        'n_keystone_edges': n_keystone_kept,
        'total_possible_edges': total_edges,
        'seed': seed,
        'pct_tipped_cascade': result.metrics['pct_tipped_at_cascade_end'],
        'final_pct_tipped': result.metrics['final_pct_tipped'],
        'recovery_fraction': result.metrics['recovery_fraction'],
        'n_tip_events': n_tip_events,
        'n_recover_events': n_recover_events,
        'tip_recovery_ratio': tip_recovery_ratio,
        'n_permanent_tips': result.metrics['n_permanent_tips'],
    }

In [None]:
# Prepare data for workers
data_bytes = pickle.dumps(amazon_data)

# Scatter data to workers
data_future = client.scatter(data_bytes, broadcast=True)
print(f"Data scattered to workers: {len(data_bytes) / 1024:.1f} KB")

In [None]:
# Submit all tasks
start_time = time.time()

futures = []
task_info = []

for connectivity in CONNECTIVITY_LEVELS:
    for strategy in STRATEGIES:
        for run_idx in range(CONFIG['n_runs']):
            seed = CONFIG['base_seed'] + run_idx + int(connectivity * 1000) + hash(strategy) % 1000
            
            future = client.submit(
                run_connectivity_experiment,
                data_future,
                connectivity,
                strategy,
                KEYSTONE_EDGES,
                CONFIG,
                seed,
                pure=False
            )
            futures.append(future)
            task_info.append((connectivity, strategy, run_idx))

print(f"Submitted {len(futures)} tasks")
print(f"Conditions: {len(CONNECTIVITY_LEVELS)} connectivity × {len(STRATEGIES)} strategies × {CONFIG['n_runs']} runs")

In [None]:
# Collect results with progress tracking
results = []
completed = 0
errors = 0

for future in as_completed(futures):
    try:
        result = future.result()
        results.append(result)
    except Exception as e:
        errors += 1
        print(f"Error: {e}")
    
    completed += 1
    if completed % 50 == 0 or completed == len(futures):
        elapsed = time.time() - start_time
        rate = completed / elapsed
        eta = (len(futures) - completed) / rate if rate > 0 else 0
        print(f"Progress: {completed}/{len(futures)} ({100*completed/len(futures):.1f}%) - "
              f"Elapsed: {elapsed/60:.1f}m - ETA: {eta/60:.1f}m")

total_time = time.time() - start_time
print(f"\nCompleted in {total_time/60:.1f} minutes")
print(f"Successful: {len(results)}, Errors: {errors}")

In [None]:
# Convert to DataFrame
df = pd.DataFrame(results)
print(f"Results shape: {df.shape}")
df.head()

In [None]:
# Aggregate by connectivity and strategy
summary = df.groupby(['connectivity', 'strategy']).agg({
    'recovery_fraction': ['mean', 'std', 'count'],
    'n_edges': 'mean',
    'n_keystone_edges': 'mean',
    'pct_tipped_cascade': 'mean',
    'tip_recovery_ratio': 'mean',
    'n_permanent_tips': 'mean',
}).round(4)

summary.columns = ['_'.join(col).strip() for col in summary.columns.values]
summary = summary.reset_index()
print(summary.to_string())

In [None]:
# Pivot for easier plotting
pivot_recovery = df.pivot_table(
    values='recovery_fraction',
    index='connectivity',
    columns='strategy',
    aggfunc=['mean', 'std']
)

print("Recovery Fraction by Connectivity and Strategy:")
print(pivot_recovery.round(4))

In [None]:
# Figure 12c.1: Recovery fraction vs connectivity (3 curves)
fig, ax = plt.subplots(figsize=(10, 6))

colors = {'random': 'blue', 'keystone_preserve': 'green', 'keystone_remove': 'red'}
labels = {'random': 'Random', 'keystone_preserve': 'Keystone Preserve', 'keystone_remove': 'Keystone Remove'}

for strategy in STRATEGIES:
    data = df[df['strategy'] == strategy].groupby('connectivity')['recovery_fraction'].agg(['mean', 'std'])
    
    ax.errorbar(
        data.index * 100,  # Convert to percentage
        data['mean'] * 100,  # Convert to percentage
        yerr=data['std'] * 100,
        label=labels[strategy],
        color=colors[strategy],
        marker='o',
        capsize=3,
        linewidth=2,
        markersize=8
    )

ax.axhline(y=10, color='gray', linestyle='--', alpha=0.5, label='10% threshold')
ax.set_xlabel('Connectivity (%)', fontsize=12)
ax.set_ylabel('Recovery Fraction (%)', fontsize=12)
ax.set_title('Experiment 12c: Recovery vs Network Connectivity', fontsize=14)
ax.legend(fontsize=10)
ax.grid(True, alpha=0.3)
ax.set_xlim(-2, 105)
ax.set_ylim(-2, 50)

plt.tight_layout()
plt.savefig('/workspace/data/exp12c_recovery_vs_connectivity.png', dpi=150, bbox_inches='tight')
plt.show()

In [None]:
# Figure 12c.2: Keystone benefit (preserve - random) vs connectivity
fig, ax = plt.subplots(figsize=(10, 5))

random_recovery = df[df['strategy'] == 'random'].groupby('connectivity')['recovery_fraction'].mean()
preserve_recovery = df[df['strategy'] == 'keystone_preserve'].groupby('connectivity')['recovery_fraction'].mean()
remove_recovery = df[df['strategy'] == 'keystone_remove'].groupby('connectivity')['recovery_fraction'].mean()

benefit_preserve = (preserve_recovery - random_recovery) * 100
benefit_remove = (remove_recovery - random_recovery) * 100

ax.bar(np.array(benefit_preserve.index) * 100 - 1.5, benefit_preserve.values, 
       width=3, color='green', alpha=0.7, label='Keystone Preserve - Random')
ax.bar(np.array(benefit_remove.index) * 100 + 1.5, benefit_remove.values, 
       width=3, color='red', alpha=0.7, label='Keystone Remove - Random')

ax.axhline(y=0, color='black', linestyle='-', linewidth=0.5)
ax.set_xlabel('Connectivity (%)', fontsize=12)
ax.set_ylabel('Recovery Difference (percentage points)', fontsize=12)
ax.set_title('Keystone Effect: Benefit of Preserving vs Removing Keystones', fontsize=14)
ax.legend(fontsize=10)
ax.grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.savefig('/workspace/data/exp12c_keystone_benefit.png', dpi=150, bbox_inches='tight')
plt.show()

In [None]:
# Figure 12c.3: Tip/recovery ratio vs connectivity
fig, ax = plt.subplots(figsize=(10, 5))

for strategy in STRATEGIES:
    data = df[df['strategy'] == strategy].groupby('connectivity')['tip_recovery_ratio'].mean()
    ax.plot(data.index * 100, data.values, 
            label=labels[strategy], color=colors[strategy], 
            marker='o', linewidth=2, markersize=6)

ax.axhline(y=1.0, color='gray', linestyle='--', alpha=0.5, label='Balanced (ratio=1)')
ax.set_xlabel('Connectivity (%)', fontsize=12)
ax.set_ylabel('Tip/Recovery Ratio', fontsize=12)
ax.set_title('Asymmetry: Tip/Recovery Ratio vs Connectivity', fontsize=14)
ax.legend(fontsize=10)
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('/workspace/data/exp12c_asymmetry.png', dpi=150, bbox_inches='tight')
plt.show()

In [None]:
# Threshold analysis: Find connectivity where recovery crosses 10%
from scipy.interpolate import interp1d

def find_threshold(connectivity, recovery, target=0.10):
    """Find connectivity level where recovery crosses target."""
    # Sort by connectivity
    idx = np.argsort(connectivity)
    conn_sorted = np.array(connectivity)[idx]
    rec_sorted = np.array(recovery)[idx]
    
    # Find crossing point
    for i in range(len(rec_sorted) - 1):
        if rec_sorted[i] < target <= rec_sorted[i+1]:
            # Linear interpolation
            t = (target - rec_sorted[i]) / (rec_sorted[i+1] - rec_sorted[i])
            return conn_sorted[i] + t * (conn_sorted[i+1] - conn_sorted[i])
    
    return None

print("=== Threshold Analysis ===")
print("\nConnectivity where recovery crosses 10%:")

for strategy in STRATEGIES:
    data = df[df['strategy'] == strategy].groupby('connectivity')['recovery_fraction'].mean()
    threshold = find_threshold(data.index.tolist(), data.values.tolist(), target=0.10)
    if threshold:
        print(f"  {labels[strategy]}: {threshold*100:.1f}%")
    else:
        print(f"  {labels[strategy]}: Not reached")

In [None]:
# Bootstrap confidence intervals for threshold
def bootstrap_threshold(df_strategy, target=0.10, n_bootstrap=1000):
    """Bootstrap 95% CI for threshold."""
    thresholds = []
    
    for _ in range(n_bootstrap):
        # Resample with replacement
        sample = df_strategy.groupby('connectivity').apply(
            lambda x: x.sample(n=len(x), replace=True)
        ).reset_index(drop=True)
        
        # Compute mean recovery per connectivity
        means = sample.groupby('connectivity')['recovery_fraction'].mean()
        
        # Find threshold
        threshold = find_threshold(means.index.tolist(), means.values.tolist(), target=target)
        if threshold is not None:
            thresholds.append(threshold)
    
    if len(thresholds) > 0:
        return np.percentile(thresholds, [2.5, 50, 97.5])
    return None

print("\n=== Bootstrap 95% CI for 10% Recovery Threshold ===")
for strategy in STRATEGIES:
    df_strat = df[df['strategy'] == strategy]
    ci = bootstrap_threshold(df_strat, target=0.10, n_bootstrap=500)
    if ci is not None:
        print(f"{labels[strategy]}: {ci[1]*100:.1f}% [{ci[0]*100:.1f}%, {ci[2]*100:.1f}%]")
    else:
        print(f"{labels[strategy]}: Threshold not reached in bootstrap samples")

In [None]:
# Summary statistics table
print("\n" + "="*80)
print("EXPERIMENT 12c: KEY FINDINGS")
print("="*80)

# Reference values
full_network = df[df['connectivity'] == 1.0]['recovery_fraction'].mean()
print(f"\nFull network (100% connectivity): {full_network*100:.1f}% recovery")

print("\n--- Recovery by Connectivity Level ---")
print(f"{'Connectivity':>12} | {'Random':>10} | {'Preserve':>10} | {'Remove':>10}")
print("-" * 50)

for conn in CONNECTIVITY_LEVELS:
    random_rec = df[(df['connectivity'] == conn) & (df['strategy'] == 'random')]['recovery_fraction'].mean()
    preserve_rec = df[(df['connectivity'] == conn) & (df['strategy'] == 'keystone_preserve')]['recovery_fraction'].mean()
    remove_rec = df[(df['connectivity'] == conn) & (df['strategy'] == 'keystone_remove')]['recovery_fraction'].mean()
    print(f"{conn*100:>10.0f}% | {random_rec*100:>9.1f}% | {preserve_rec*100:>9.1f}% | {remove_rec*100:>9.1f}%")

print("\n--- Keystone Effect Summary ---")
avg_benefit_preserve = benefit_preserve.mean()
avg_benefit_remove = benefit_remove.mean()
print(f"Average benefit of keystone preservation: {avg_benefit_preserve:+.1f} percentage points")
print(f"Average impact of keystone removal: {avg_benefit_remove:+.1f} percentage points")

In [None]:
# Save results
output_path = '/workspace/data/experiment12c_results.csv'
df.to_csv(output_path, index=False)
print(f"Results saved to {output_path}")

# Also save summary
summary_path = '/workspace/data/experiment12c_summary.csv'
summary.to_csv(summary_path, index=False)
print(f"Summary saved to {summary_path}")

## Key Findings

### 1. Connectivity Threshold
- **Minimum connectivity for >10% recovery**: [TBD after running]
- **Transition type**: Sharp or gradual [TBD]

### 2. Keystone Effect
- **Benefit of keystone preservation**: [TBD]
- **Cost of keystone removal**: [TBD]
- **Effect magnitude at different connectivity levels**: [TBD]

### 3. Conservation Implications
- Minimum viable network density: [TBD]
- Value of keystone protection: [TBD]
- Recommended conservation strategy: [TBD]