# Staggered Selection Utilities - Usage Examples

This notebook demonstrates how to use the staggered selection utilities in IPTA Metapulsar Analysis for creating Enterprise-compatible selection functions.

## Overview

The `create_staggered_selection` function provides a modern, well-documented API for creating selection functions that support:

- **Hierarchical flag selection** with fallback mechanisms
- **Single flag selection** for simple cases
- **Staggered flag selection** for complex fallback scenarios
- **Frequency filtering** for band-specific selections
- **Enterprise compatibility** with the `Selection` class

## Table of Contents

1. [Basic Setup](#basic-setup)
2. [Simple Flag Selection](#simple-flag-selection)
3. [Staggered Selection with Fallback](#staggered-selection-with-fallback)
4. [Frequency Band Filtering](#frequency-band-filtering)
5. [Enterprise Integration](#enterprise-integration)
6. [Real-world Examples](#real-world-examples)
7. [Advanced Usage Patterns](#advanced-usage-patterns)


## Basic Setup

First, let's import the necessary modules and create some mock data for demonstration.


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from enterprise.signals.selections import Selection
from metapulsar.selection_utils import create_staggered_selection

# Set up plotting
plt.style.use('default')
np.random.seed(42)  # For reproducible examples


In [None]:
# Create realistic mock pulsar data
class MockPulsar:
    """Mock pulsar class for demonstration purposes."""
    
    def __init__(self, name, **kwargs):
        self.name = name
        for key, value in kwargs.items():
            setattr(self, key, value)
        
        # Enterprise expects 'flags' and 'freqs' attributes
        self.flags = {
            'group': kwargs.get('group', np.array([])),
            'f': kwargs.get('f', np.array([])),
            'B': kwargs.get('B', np.array([])),
            'pta': kwargs.get('pta', np.array([])),
            'backend': kwargs.get('backend', np.array([]))
        }
        self.freqs = kwargs.get('freqs', np.array([]))

# Create mock data representing a typical pulsar timing array
n_toas = 100
freqs = np.random.uniform(100, 2000, n_toas)  # MHz

# Create realistic flag values
groups = np.random.choice(['ASP_430', 'ASP_800', 'ASP_1400', 'ASP_2000'], n_toas)
f_flags = np.random.choice(['GASP_430', 'GASP_800', 'GASP_1400'], n_toas)
B_flags = np.random.choice(['1', '2', '3'], n_toas)
pta_flags = np.random.choice(['EPTA', 'PPTA', 'NANOGrav', 'MPTA'], n_toas)
backend_flags = np.random.choice(['ASP', 'GASP', 'PUPPI'], n_toas)

# Create mock pulsar
mock_psr = MockPulsar(
    name='J1909-3744',
    group=groups,
    f=f_flags,
    B=B_flags,
    pta=pta_flags,
    backend=backend_flags,
    freqs=freqs
)

print(f"Created mock pulsar {mock_psr.name} with {len(freqs)} TOAs")
print(f"Frequency range: {freqs.min():.1f} - {freqs.max():.1f} MHz")
print(f"Group flags: {np.unique(groups)}")
print(f"F flags: {np.unique(f_flags)}")
print(f"PTA flags: {np.unique(pta_flags)}")


## Simple Flag Selection

The simplest use case is selecting based on a single flag with all values.


In [None]:
# Example 1: Group-based selection (all values)
group_sel = create_staggered_selection("efac", {"group": None})

# Test the selection function directly
flags = {"group": groups}
result = group_sel(flags, freqs)

print("Group-based selection results:")
for key, mask in result.items():
    print(f"  {key}: {mask.sum()} TOAs selected")

# Visualize the selection
fig, ax = plt.subplots(1, 1, figsize=(10, 6))
colors = ['red', 'blue', 'green', 'orange']
for i, (key, mask) in enumerate(result.items()):
    selected_freqs = freqs[mask]
    ax.scatter(selected_freqs, [i] * len(selected_freqs), 
              c=colors[i % len(colors)], label=key, alpha=0.7)

ax.set_xlabel('Frequency (MHz)')
ax.set_ylabel('Selection')
ax.set_title('Group-based Selection Results')
ax.legend()
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()


## Staggered Selection with Fallback

Staggered selection allows you to specify a hierarchy of flags, with fallback to secondary flags if primary flags are not available.


In [None]:
# Example 2: Staggered selection (group fallback to f)
staggered_sel = create_staggered_selection("efac", {("group", "f"): None})

# Test with both flags available
flags_both = {"group": groups, "f": f_flags}
result_both = staggered_sel(flags_both, freqs)

print("Staggered selection with both flags available:")
print("(Should use 'group' flag as primary)")
for key, mask in result_both.items():
    print(f"  {key}: {mask.sum()} TOAs selected")

# Test with only fallback flag available
flags_fallback = {"f": f_flags}  # No 'group' flag
result_fallback = staggered_sel(flags_fallback, freqs)

print("\nStaggered selection with only fallback flag:")
print("(Should fallback to 'f' flag)")
for key, mask in result_fallback.items():
    print(f"  {key}: {mask.sum()} TOAs selected")


## Enterprise Integration

The selection functions are designed to work seamlessly with Enterprise's `Selection` class.


In [None]:
# Example 3: Enterprise Selection integration
efac_sel = create_staggered_selection("efac", {"group": None})
selection = Selection(efac_sel)

# Create selection instance with mock pulsar
selection_instance = selection(mock_psr)
masks = selection_instance.masks

print("Enterprise Selection integration:")
print(f"Selection instance created for pulsar: {mock_psr.name}")
print(f"Number of selection masks: {len(masks)}")
for key, mask in masks.items():
    print(f"  {key}: {mask.sum()} TOAs selected")

# Test parameter generation
params, param_masks = selection_instance("efac", lambda x: f"param_{x}")
print("\nParameter generation:")
print(f"Number of parameters: {len(params)}")
for key, param in params.items():
    print(f"  {key}: {param}")


In [None]:
import numpy as np
from metapulsar import PTARegistry
from metapulsar import MetaPulsarFactory

registry = PTARegistry()

factory = MetaPulsarFactory(registry)

[32m2025-10-02 04:21:19.608[0m | [34m[1mDEBUG   [0m | [36mmetapulsar.pta_registry[0m:[36m__init__[0m:[36m94[0m - [34m[1mInitialized PTA registry with 8 configurations[0m


In [None]:
pta_config_dict = {}
for pta_name in ['epta_dr1_v2_2', 'ppta_dr2', 'nanograv_9y']:
    pta_config_dict[pta_name] = registry.get_pta(pta_name)

In [12]:
def prepare_legacy_input_files(
    pulsar_name, pta_configs
):
    """Prepare input files for legacy implementation using the same discovery as new system."""
    registry = PTARegistry()

    # Use coordinate-based discovery (the correct approach)
    from metapulsar.metapulsar_factory import MetaPulsarFactory

    factory = MetaPulsarFactory(registry)

    # Convert list of PTA names to dictionary of PTA configurations
    pta_config_dict = {}
    for pta_name in pta_configs:
        pta_config_dict[pta_name] = registry.get_pta(pta_name)

    print(pta_config_dict)

    file_pairs = factory.discover_files(pulsar_name, pta_config_dict)

    # Convert to the format expected by legacy implementation
    par_files = []
    tim_files = []

    for config_name in pta_configs:
        if config_name in file_pairs:
            par_file, tim_file = file_pairs[config_name]
            par_files.append(str(par_file))
            tim_files.append(str(tim_file))
        else:
            # Add None for missing PTAs to maintain order
            par_files.append(None)
            tim_files.append(None)

    return par_files, tim_files


In [13]:
prepare_legacy_input_files('J0030+0451', ['epta_dr1_v2_2', 'ppta_dr2', 'nanograv_9y'])

[32m2025-10-02 04:37:28.235[0m | [34m[1mDEBUG   [0m | [36mmetapulsar.pta_registry[0m:[36m__init__[0m:[36m94[0m - [34m[1mInitialized PTA registry with 8 configurations[0m


{'epta_dr1_v2_2': {'base_dir': 'data/ipta-dr2/EPTA_v2.2/', 'par_pattern': '([BJ]\\d{4}[+-]\\d{2,4})/\\1\\.par', 'tim_pattern': '([BJ]\\d{4}[+-]\\d{2,4})/\\1_all\\.tim', 'timing_package': 'tempo2', 'priority': 1, 'description': 'EPTA Data Release 1 v2.2'}, 'ppta_dr2': {'base_dir': 'data/ipta-dr2/PPTA_dr1dr2/', 'par_pattern': 'par/([BJ]\\d{4}[+-]\\d{2,4})_dr1dr2\\.par', 'tim_pattern': 'tim/([BJ]\\d{4}[+-]\\d{2,4})_dr1dr2\\.tim', 'timing_package': 'tempo2', 'priority': 1, 'description': 'PPTA Data Release 1+2'}, 'nanograv_9y': {'base_dir': 'data/ipta-dr2/NANOGrav_9y/', 'par_pattern': 'par/([BJ]\\d{4}[+-]\\d{2,4})_NANOGrav_9yv1\\.gls\\.par', 'tim_pattern': 'tim/([BJ]\\d{4}[+-]\\d{2,4})_NANOGrav_9yv1\\.tim', 'timing_package': 'pint', 'priority': 1, 'description': 'NANOGrav 9-year Data Release'}}


ValueError: Pulsar 'J0030+0451' not found. Available: []

In [None]:
# Import legacy module
import sys
sys.path.append('src')
from metapulsar.legacy import metapulsar as legacy_module

test_pta_configs = ["epta_dr1_v2_2", "ppta_dr2", "nanograv_9y"]

for pulsar in ['J0030+0451', 'J0437-4715', 'J1909-3744', 'J1713+0747'][:2]:
    par_files, tim_files = prepare_legacy_input_files(
        pulsar, test_pta_configs
    )

    valid_files = [
        (p, t)
        for p, t in zip(par_files, tim_files)
        if p is not None and t is not None
    ]
    if not valid_files:
        continue

    input_files = []

    for i, (par_file, tim_file) in enumerate(zip(par_files, tim_files)):
        if par_file is None or tim_file is None:
            continue  # Skip missing files

        pta_name = test_pta_configs[i]
        # Determine timing package based on PTA
        package = (
            "tempo2" if pta_name in ["epta_dr1_v2_2", "ppta_dr2"] else "pint"
        )
        input_files.append(
            {
                "pta": pta_name,
                "parfile": par_file,
                "timfile": tim_file,
                "package": package,
            }
        )

    # Create legacy MetaPulsar
    legacy_mp = legacy_module.create_metapulsar(input_files)

    # Create new MetaPulsar
    new_mp = MetaPulsarFactory().create_metapulsar(
        pulsar_name=pulsar,
        pta_names=['epta_dr1_v2_2', 'ppta_dr2', 'nanograv_9y'],
        reference_pta='epta_dr1_v2_2',
        combine_components=['efac', 'eflag', 'ecorr'],
        add_dm_derivatives=True,
    )

    # Compare basic properties
    

[32m2025-10-02 04:35:13.643[0m | [34m[1mDEBUG   [0m | [36mmetapulsar.pta_registry[0m:[36m__init__[0m:[36m94[0m - [34m[1mInitialized PTA registry with 8 configurations[0m


ValueError: Pulsar 'J0030+0451' not found. Available: []