facts-total:
- This is a minimal prototype of a total module for summing sealevel rise projections generated from different sources and modules. facts-total is a CLI tool that accepts a path to each netCDF file you would like summed as well as an output path where the summed result will be written. Each input netCDF file represents output from a FACTS sea level component module. It is the responsibility of the user to ensure that the desired and correct files are specified; check that file paths are correct and that each file specified belongs to the same scale ('global' or 'local').

- It is possible to run multiple FACTS sea-level components with different default values for common parameters such as pyear-start and pyear-end. If that happens, total will not cause a failure, but will show a message similar to the following:



In [1]:
import sys
# Add a path to the search list
sys.path.insert(0, '/discover/nobackup/projects/eis_freshwater/gtamkin/facts2.0')

In [2]:
import os

# Get and print the current working directory (optional, for verification)
cwd = os.getcwd()
print(f"Current working directory: {cwd}")

# Change the current working directory to a new path
new_directory_path = "/discover/nobackup/projects/eis_freshwater/gtamkin/facts2.0" # Example for Linux/macOS
# For Windows, you can use forward slashes or a raw string (see below)

try:
    os.chdir(new_directory_path)
    print(f"Directory successfully changed to: {os.getcwd()}")
except FileNotFoundError:
    print(f"Directory not found: {new_directory_path}")
except Exception as e:
    print(f"An error occurred: {e}")


Current working directory: /gpfsm/dnb06/projects/p151/gtamkin/facts2.0/notebooks
Directory successfully changed to: /gpfsm/dnb06/projects/p151/gtamkin/facts2.0


In [3]:
import asyncio
import logging
import time
import os
import shlex

from radical.asyncflow import WorkflowEngine
from radical.asyncflow import ConcurrentExecutionBackend

from concurrent.futures import ThreadPoolExecutor

from radical.asyncflow.logging import init_default_logger

logger = logging.getLogger(__name__)
print(logger)
for handler in logger.handlers:
    print(f"Handler: {handler}")
    if isinstance(handler, logging.FileHandler):
        print(f"Log file: {handler.baseFilename}")



In [None]:
import asyncio
import subprocess

async def main():
    init_default_logger(logging.DEBUG)

    # Create backend and workflow
    engine = await ConcurrentExecutionBackend(ThreadPoolExecutor())
    flow = await WorkflowEngine.create(engine)
    
    # Ensure output directories exist
    def setup_directories():
        os.makedirs('./data/output/total', exist_ok=True)

    @flow.executable_task
    async def total_task(component, name):
        """Facts total task - executes singularity command"""
        if (component == 'all'):
            cmd = [
                '/usr/local/other/singularity/4.0.3/bin/singularity', 'exec',
                '--bind', './data/input:/mnt/total_in',
                '--bind', './data/output:/mnt/total_out',
                './containers/sealevel-facts-total_latest-sandbox',
                'facts-total',
                '--item=/mnt/total_out/lws/'+name+'.nc',
                '--item=/mnt/total_out/sterodynamics/'+name+'.nc',
                # ADD ICE COMPONENT IF AVAILABLE:
                # '--item=/mnt/total_out/ice/'+name+'.nc',
                '--pyear-start=2020',
                '--pyear-end=2150',
                '--pyear-step=10',
                '--output-path=/mnt/total_out/totaled_output_all_'+name+'.nc'
            ]
        else:
            cmd = [
                '/usr/local/other/singularity/4.0.3/bin/singularity', 'exec',
                '--bind', './data/input:/mnt/total_in',
                '--bind', './data/output:/mnt/total_out',
                './containers/sealevel-facts-total_latest-sandbox',
                'facts-total',
                '--item=/mnt/total_out/'+component+'/'+name+'.nc',
                '--pyear-start=2020',
                '--pyear-end=2150',
                '--pyear-step=10',
                '--output-path=/mnt/total_out/totaled_output_'+component+'_'+name+'.nc'
            ]
        
        # Log the command
        cmd_str = shlex.join(cmd)
        logger.info(f"Executing: {cmd_str}")
        
        # RUN THE COMMAND ASYNCHRONOUSLY
        proc = await asyncio.create_subprocess_exec(
            *cmd,
            stdout=asyncio.subprocess.PIPE,
            stderr=asyncio.subprocess.PIPE
        )
        
        stdout, stderr = await proc.communicate()
        
        if proc.returncode != 0:
            error_msg = stderr.decode() if stderr else "Unknown error"
            logger.error(f"Command failed with return code {proc.returncode}: {error_msg}")
            raise RuntimeError(f"Task failed: {error_msg}")
        
        logger.info(f"Command output: {stdout.decode()}")
        if stderr:
            logger.warning(f"Command stderr: {stderr.decode()}")
            
        return {
            'command': cmd_str,
            'component': component,
            'name': name,
            'returncode': proc.returncode
        }

    async def run_climate_workflow(pipeline_id):
        """Run the complete climate workflow"""
        logger.info(f'Starting climate workflow {pipeline_id} at {time.time()}')

        # Setup directories
        setup_directories()
        
        # Start ALL tasks in parallel (don't await yet)
        total_future_lws_lslr = total_task('lws','lslr')
        total_future_lws_gslr = total_task('lws','gslr')
        total_future_sterodynamics_lslr = total_task('sterodynamics','lslr')
        total_future_sterodynamics_gslr = total_task('sterodynamics','gslr')
        total_future_all_lslr = total_task('all','lslr')
        total_future_all_gslr = total_task('all','gslr')

        # Wait for ALL tasks to complete in parallel using asyncio.gather
        results = await asyncio.gather(
            total_future_lws_lslr,
            total_future_lws_gslr,
            total_future_sterodynamics_lslr,
            total_future_sterodynamics_gslr,
            total_future_all_lslr,
            total_future_all_gslr,
            return_exceptions=True  # Continue even if one fails
        )
        
        # Unpack results
        (total_result_lws_lslr, 
         total_result_lws_gslr,
         total_result_sterodynamics_lslr,
         total_result_sterodynamics_gslr,
         total_result_all_lslr,
         total_result_all_gslr) = results
        
        # Check for failures
        for i, result in enumerate(results):
            if isinstance(result, Exception):
                logger.error(f"Task {i} failed with error: {result}")
        
        logger.info(f'ALL TOTAL tasks completed for pipeline {pipeline_id}')
        logger.info(f'Climate workflow {pipeline_id} finished at {time.time()}')

        return {
            'total_result_lws_lslr': total_result_lws_lslr,
            'total_result_lws_gslr': total_result_lws_gslr,
            'total_result_sterodynamics_lslr': total_result_sterodynamics_lslr,
            'total_result_sterodynamics_gslr': total_result_sterodynamics_gslr,
            'total_result_all_lslr': total_result_all_lslr,
            'total_result_all_gslr': total_result_all_gslr,
        }

    # Run workflow(s)
    results = await run_climate_workflow(1)
    logger.info("=========All workflows completed successfully=========")
    logger.info(results)
    await flow.shutdown()

# Just call it with await in Jupyter
await main()

[90m2026-02-03 16:29:14.874[0m │ [94mINFO[0m │ [38;5;165m[root][0m │ Logger configured successfully - Console: DEBUG, File: disabled (N/A), Structured: disabled, Style: modern
[90m2026-02-03 16:29:14.875[0m │ [94mINFO[0m │ [38;5;165m[execution.backend(concurrent)][0m │ ThreadPoolExecutor execution backend started successfully
[90m2026-02-03 16:29:14.875[0m │ [96mDEBUG[0m │ [38;5;165m[workflow_manager][0m │ Registered signal handler for SIGHUP
[90m2026-02-03 16:29:14.875[0m │ [96mDEBUG[0m │ [38;5;165m[workflow_manager][0m │ Registered signal handler for SIGTERM
[90m2026-02-03 16:29:14.876[0m │ [96mDEBUG[0m │ [38;5;165m[workflow_manager][0m │ Registered signal handler for SIGINT
[90m2026-02-03 16:29:14.876[0m │ [96mDEBUG[0m │ [38;5;165m[workflow_manager][0m │ Started run component
[90m2026-02-03 16:29:14.876[0m │ [94mINFO[0m │ [38;5;165m[main][0m │ Starting climate workflow 1 at 1770154154.8769002
[90m2026-02-03 16:29:14.881[0m │ [94mINFO[0m 

In [None]:
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

# Disable numbagg to avoid NumPy compatibility issues
xr.set_options(use_bottleneck=False, use_numbagg=False)

def load_sea_level_data(filepath):
    """Load sea level data from NetCDF file, handling different structures"""
    ds = xr.open_dataset(filepath)
    
    # Find the sea level variable (different files may use different names)
    possible_names = ['sea_level_change', 'sealevel_change', 'slr', 'sea_level']
    var_name = None
    
    for name in possible_names:
        if name in ds.data_vars:
            var_name = name
            break
    
    if var_name is None:
        # Just take the first non-coordinate variable
        data_vars = [v for v in ds.data_vars if v not in ['lat', 'lon']]
        if data_vars:
            var_name = data_vars[0]
        else:
            raise ValueError(f"Could not find sea level variable in {filepath}")
    
    data = ds[var_name]
    years = ds['years'].values if 'years' in ds else None
    
    return data, years, ds

def verify_totals(output_dir='./data/output', file_type='lslr'):
    """
    Verify that totaled_output_all_{file_type}.nc = sum of individual component files
    
    Parameters:
    -----------
    output_dir : str
        Directory containing the output files
    file_type : str
        Either 'lslr' or 'gslr'
    """
    
    print(f"\n{'='*60}")
    print(f"VERIFYING {file_type.upper()} TOTALS")
    print(f"{'='*60}\n")
    
    # Load the totaled file
    total_file = Path(output_dir) / f'totaled_output_all_{file_type}.nc'
    total_data, total_years, total_ds = load_sea_level_data(total_file)
    
    print(f"Loaded total file: {total_file.name}")
    print(f"  Shape: {total_data.shape}")
    print(f"  Dimensions: {total_data.dims}")
    
    # Load individual component files
    components = ['lws', 'sterodynamics']  # Add 'ice' if you have it
    component_data = {}
    
    for comp in components:
        comp_file = Path(output_dir) / comp / f'{file_type}.nc'
        if comp_file.exists():
            data, years, ds = load_sea_level_data(comp_file)
            component_data[comp] = {'data': data, 'years': years, 'ds': ds}
            print(f"Loaded {comp}: {comp_file.name}")
            print(f"  Shape: {data.shape}")
            print(f"  Dimensions: {data.dims}")
        else:
            print(f"WARNING: Component file not found: {comp_file}")
    
    # Align dimensions and sum components
    print("\nCalculating sum of components...")
    
    # We need to handle different dimension orders
    # Total file has: (samples, years, locations)
    # Component files might have: (years, samples) for gslr or other orders for lslr
    
    # Squeeze out location dimension if present (global = 1 location)
    if 'locations' in total_data.dims:
        total_data_squeeze = total_data.squeeze('locations')
    else:
        total_data_squeeze = total_data
    
    # Sum components, handling dimension alignment
    manual_sum = None
    for comp_name, comp_dict in component_data.items():
        comp_data = comp_dict['data']
        
        # Squeeze locations if present
        if 'locations' in comp_data.dims:
            comp_data = comp_data.squeeze('locations')
        
        # Ensure dimensions match total
        # Transpose if needed to match (samples, years)
        if comp_data.dims != total_data_squeeze.dims:
            # Try to align dimensions
            comp_data = comp_data.transpose(*total_data_squeeze.dims)
        
        if manual_sum is None:
            manual_sum = comp_data.copy()
        else:
            manual_sum = manual_sum + comp_data
        
        print(f"  Added {comp_name}: shape {comp_data.shape}")
    
    print(f"Manual sum shape: {manual_sum.shape}")
    
    # Calculate difference
    difference = total_data_squeeze - manual_sum
    max_diff = float(np.nanmax(np.abs(difference.values)))
    mean_diff = float(np.nanmean(np.abs(difference.values)))
    
    print(f"\nDifference Statistics:")
    print(f"  Max absolute difference: {max_diff:.6e} mm")
    print(f"  Mean absolute difference: {mean_diff:.6e} mm")
    
    if max_diff < 1e-3:  # Less than 0.001 mm difference
        print("  ✅ VERIFICATION PASSED: Total matches sum of components")
    else:
        print("  ⚠️  WARNING: Significant differences detected")
    
    # Create visualization
    fig = plt.figure(figsize=(16, 12))
    
    # Get sample indices for plotting
    n_samples = total_data_squeeze.shape[0]
    sample_indices = [0, n_samples//2, n_samples-1]  # First, middle, last
    
    # Plot 1: Time series for selected samples
    ax1 = plt.subplot(3, 3, 1)
    for idx in sample_indices:
        ax1.plot(total_years, total_data_squeeze[idx, :], 
                label=f'Total (sample {idx})', linewidth=2)
    ax1.set_xlabel('Year')
    ax1.set_ylabel('Sea Level Change (mm)')
    ax1.set_title(f'Total {file_type.upper()}: Selected Samples')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    # Plot 2: Individual components for one sample
    ax2 = plt.subplot(3, 3, 2)
    sample_to_plot = 0
    for comp_name, comp_dict in component_data.items():
        comp_data = comp_dict['data']
        if 'locations' in comp_data.dims:
            comp_data = comp_data.squeeze('locations')
        if comp_data.dims != total_data_squeeze.dims:
            comp_data = comp_data.transpose(*total_data_squeeze.dims)
        ax2.plot(total_years, comp_data[sample_to_plot, :], 
                label=comp_name, linewidth=2, marker='o', markersize=4)
    ax2.set_xlabel('Year')
    ax2.set_ylabel('Sea Level Change (mm)')
    ax2.set_title(f'Individual Components (sample {sample_to_plot})')
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    
    # Plot 3: Manual sum vs Total for one sample
    ax3 = plt.subplot(3, 3, 3)
    ax3.plot(total_years, total_data_squeeze[sample_to_plot, :], 
            label='Total (from file)', linewidth=2, marker='o')
    ax3.plot(total_years, manual_sum[sample_to_plot, :], 
            label='Manual sum', linewidth=2, marker='s', linestyle='--')
    ax3.set_xlabel('Year')
    ax3.set_ylabel('Sea Level Change (mm)')
    ax3.set_title(f'Verification: Total vs Sum (sample {sample_to_plot})')
    ax3.legend()
    ax3.grid(True, alpha=0.3)
    
    # Plot 4: Ensemble mean comparison
    ax4 = plt.subplot(3, 3, 4)
    total_mean = total_data_squeeze.mean(dim='samples')
    manual_sum_mean = manual_sum.mean(dim='samples')
    ax4.plot(total_years, total_mean, label='Total mean', linewidth=3)
    ax4.plot(total_years, manual_sum_mean, label='Sum mean', 
            linewidth=3, linestyle='--')
    for comp_name, comp_dict in component_data.items():
        comp_data = comp_dict['data']
        if 'locations' in comp_data.dims:
            comp_data = comp_data.squeeze('locations')
        if comp_data.dims != total_data_squeeze.dims:
            comp_data = comp_data.transpose(*total_data_squeeze.dims)
        comp_mean = comp_data.mean(dim='samples')
        ax4.plot(total_years, comp_mean, label=f'{comp_name} mean', 
                linewidth=2, alpha=0.7)
    ax4.set_xlabel('Year')
    ax4.set_ylabel('Sea Level Change (mm)')
    ax4.set_title('Ensemble Means')
    ax4.legend()
    ax4.grid(True, alpha=0.3)
    
    # Plot 5: Ensemble spread (percentiles)
    ax5 = plt.subplot(3, 3, 5)
    total_p50 = total_data_squeeze.quantile(0.5, dim='samples').values.flatten()
    total_p05 = total_data_squeeze.quantile(0.05, dim='samples').values.flatten()
    total_p95 = total_data_squeeze.quantile(0.95, dim='samples').values.flatten()
    ax5.fill_between(total_years, total_p05, total_p95, alpha=0.3, label='Total 5-95%')
    ax5.plot(total_years, total_p50, label='Total median', linewidth=2)
    
    sum_p50 = manual_sum.quantile(0.5, dim='samples').values.flatten()
    sum_p05 = manual_sum.quantile(0.05, dim='samples').values.flatten()
    sum_p95 = manual_sum.quantile(0.95, dim='samples').values.flatten()
    ax5.plot(total_years, sum_p50, label='Sum median', 
            linewidth=2, linestyle='--')
    
    ax5.set_xlabel('Year')
    ax5.set_ylabel('Sea Level Change (mm)')
    ax5.set_title('Ensemble Spread (5th-95th percentile)')
    ax5.legend()
    ax5.grid(True, alpha=0.3)
    
    # Plot 6: Absolute difference
    ax6 = plt.subplot(3, 3, 6)
    abs_diff = np.abs(difference.values)
    for idx in sample_indices:
        ax6.plot(total_years, abs_diff[idx, :], 
                label=f'Sample {idx}', linewidth=2)
    ax6.set_xlabel('Year')
    ax6.set_ylabel('Absolute Difference (mm)')
    ax6.set_title('|Total - Sum| by Sample')
    ax6.legend()
    ax6.grid(True, alpha=0.3)
    ax6.set_yscale('log')
    
    # Plot 7: Contribution by component (stacked area)
    ax7 = plt.subplot(3, 3, 7)
    # Use ensemble mean for stacking
    stack_data = []
    labels = []
    for comp_name, comp_dict in component_data.items():
        comp_data = comp_dict['data']
        if 'locations' in comp_data.dims:
            comp_data = comp_data.squeeze('locations')
        if comp_data.dims != total_data_squeeze.dims:
            comp_data = comp_data.transpose(*total_data_squeeze.dims)
        comp_mean = comp_data.mean(dim='samples').values
        stack_data.append(comp_mean)
        labels.append(comp_name)
    
    ax7.stackplot(total_years, *stack_data, labels=labels, alpha=0.7)
    ax7.set_xlabel('Year')
    ax7.set_ylabel('Sea Level Change (mm)')
    ax7.set_title('Component Contributions (Stacked, Ensemble Mean)')
    ax7.legend(loc='upper left')
    ax7.grid(True, alpha=0.3)
    
    # Plot 8: Relative contribution by component
    ax8 = plt.subplot(3, 3, 8)
    total_abs_mean = np.abs(total_mean.values)
    for comp_name, comp_dict in component_data.items():
        comp_data = comp_dict['data']
        if 'locations' in comp_data.dims:
            comp_data = comp_data.squeeze('locations')
        if comp_data.dims != total_data_squeeze.dims:
            comp_data = comp_data.transpose(*total_data_squeeze.dims)
        comp_mean = comp_data.mean(dim='samples').values
        # Avoid division by zero
        with np.errstate(divide='ignore', invalid='ignore'):
            rel_contrib = 100 * comp_mean / total_abs_mean
            rel_contrib[~np.isfinite(rel_contrib)] = 0
        ax8.plot(total_years, rel_contrib, label=comp_name, 
                linewidth=2, marker='o')
    ax8.set_xlabel('Year')
    ax8.set_ylabel('Relative Contribution (%)')
    ax8.set_title('Relative Component Contributions')
    ax8.legend()
    ax8.grid(True, alpha=0.3)
    ax8.axhline(y=0, color='k', linestyle='-', linewidth=0.5)
    
    # Plot 9: Heatmap of differences across all samples
    ax9 = plt.subplot(3, 3, 9)
    im = ax9.imshow(abs_diff, aspect='auto', cmap='viridis', 
                    interpolation='nearest')
    ax9.set_xlabel('Year Index')
    ax9.set_ylabel('Sample Index')
    ax9.set_title('Heatmap: |Total - Sum| (all samples)')
    plt.colorbar(im, ax=ax9, label='Absolute Difference (mm)')
    
    plt.suptitle(f'{file_type.upper()} Verification: Total vs Sum of Components\n' + 
                 f'Max diff: {max_diff:.2e} mm, Mean diff: {mean_diff:.2e} mm',
                 fontsize=14, fontweight='bold')
    plt.tight_layout()
    
    # Save figure
    output_file = Path(output_dir) / f'verification_{file_type}_totals.png'
    plt.savefig(output_file, dpi=300, bbox_inches='tight')
    print(f"\nSaved verification plot to: {output_file}")
    
    plt.show()
    
    return {
        'max_difference': max_diff,
        'mean_difference': mean_diff,
        'total_data': total_data_squeeze,
        'manual_sum': manual_sum,
        'difference': difference
    }

# Run verification for both LSLR and GSLR
print("="*80)
print("VERIFYING SEA LEVEL TOTALS")
print("="*80)

# Verify LSLR
lslr_results = verify_totals(output_dir='./data/output', file_type='lslr')

# Verify GSLR
gslr_results = verify_totals(output_dir='./data/output', file_type='gslr')

print("\n" + "="*80)
print("SUMMARY")
print("="*80)
print(f"LSLR Max Difference: {lslr_results['max_difference']:.6e} mm")
print(f"GSLR Max Difference: {gslr_results['max_difference']:.6e} mm")
print("="*80)

In [None]:
async def main():
    init_default_logger(logging.DEBUG)

    # Create backend and workflow
    engine = await ConcurrentExecutionBackend(ThreadPoolExecutor())
    flow = await WorkflowEngine.create(engine)
    
    # Ensure output directories exist
    def setup_directories():
        os.makedirs('./data/output/total', exist_ok=True)

    @flow.executable_task
    async def total_task(component, name):
        """Facts total task"""
        if (component == 'all'):
            cmd = [
                '/usr/local/other/singularity/4.0.3/bin/singularity', 'exec',
                '--bind', './data/input:/mnt/total_in',
                '--bind', './data/output:/mnt/total_out',
                './containers/sealevel-facts-total_latest-sandbox',
                'facts-total',
                '--item=/mnt/total_out/lws/'+name+'.nc',
                '--item=/mnt/total_out/sterodynamics/'+name+'.nc',
                '--pyear-start=2020',
                '--pyear-end=2150',
                '--pyear-step=10',
                '--output-path=/mnt/total_out/totaled_output_all_'+name+'.nc'
            ]
        else:
            cmd = [
                '/usr/local/other/singularity/4.0.3/bin/singularity', 'exec',
                '--bind', './data/input:/mnt/total_in',
                '--bind', './data/output:/mnt/total_out',
                './containers/sealevel-facts-total_latest-sandbox',
                'facts-total',
                '--item=/mnt/total_out/'+component+'/'+name+'.nc',
                '--pyear-start=2020',
                '--pyear-end=2150',
                '--pyear-step=10',
                '--output-path=/mnt/total_out/totaled_output_'+component+'_'+name+'.nc'
            ]
        return shlex.join(cmd)

    async def run_climate_workflow(pipeline_id):
        """Run the complete climate workflow"""
        logger.info(f'Starting climate workflow {pipeline_id} at {time.time()}')

        # Setup directories
        setup_directories()
        
        # Start total tasks 
        total_future_lws_lslr = total_task('lws','lslr')
        total_future_lws_gslr = total_task('lws','gslr')
        total_future_sterodynamics_lslr = total_task('sterodynamics','lslr')
        total_future_sterodynamics_gslr = total_task('sterodynamics','gslr')
        total_future_all_lslr = total_task('all','lslr')
        total_future_all_gslr = total_task('all','gslr')

        # Wait for FAIR to complete (sterodynamics depends on it)
        total_result_lws_lsr = await total_future_lws_lslr
        total_result_lws_gslr = await total_future_lws_gslr
        total_result_sterodynamics_lslr = await total_future_sterodynamics_lslr
        total_result_sterodynamics_gslr = await total_future_sterodynamics_gslr
        total_result_all_lsr = await total_future_all_lslr
        total_result_all_gslr = await total_future_all_gslr
        logger.info(f'TOTAL task completed for pipeline {pipeline_id}')

        logger.info(f'Climate workflow {pipeline_id} finished at {time.time()}')

        return {
            'total_result_lws_lsr': total_result_lws_lsr,
            'total_result_lws_gslr': total_result_lws_gslr,
            'total_result_sterodynamics_lslr': total_result_sterodynamics_lslr,
            'total_result_sterodynamics_gslr': total_result_sterodynamics_gslr,
            'total_result_all_lsr': total_result_lws_lsr,
            'total_result_all_gslr': total_result_lws_gslr,
        }

    # Run workflow(s)
    results = await run_climate_workflow(1)
    logger.info("=========All workflows completed successfully=========")
    logger.info(results)
    await flow.shutdown()

# Just call it with await in Jupyter
await main()
