# Enhanced Configuration Setup for QuakeFlow Pipeline

This notebook sets up configuration for earthquake data processing using the unified QuakeFlow framework.
It leverages the common utilities and standardized configuration files.

In [None]:
# Import common utilities
import sys
import os
from pathlib import Path

# Add common utilities to path
examples_dir = Path.cwd().parent.parent / 'examples'
if str(examples_dir) not in sys.path:
    sys.path.insert(0, str(examples_dir))

# Import QuakeFlow common utilities
from common import notebook_setup, notebook_finalize, ElyraUtils, RegionConfig

print("QuakeFlow common utilities imported successfully")

In [None]:
# Initialize configuration using unified framework
region_config, workflow, parallel_config = notebook_setup()

region_name = region_config.region
num_parallel = parallel_config['num_parallel']

print(f"Region: {region_name}")
print(f"Parallel processes: {num_parallel}")
print(f"Configuration loaded: {region_config.config is not None}")

In [None]:
# Get geographic bounds and time range from unified config
bounds = region_config.get_geographic_bounds()
processing_config = region_config.get_processing_config('phasenet')

# Extract configuration with defaults
center = (
    (bounds['minlongitude'] + bounds['maxlongitude']) / 2,
    (bounds['minlatitude'] + bounds['maxlatitude']) / 2
)

# Get time range from environment or use defaults
import obspy
import datetime

# Time range configuration
start_time_str = os.environ.get('START_TIME')
end_time_str = os.environ.get('END_TIME')

if start_time_str and end_time_str:
    starttime = obspy.UTCDateTime(start_time_str)
    endtime = obspy.UTCDateTime(end_time_str)
else:
    # Use region-specific defaults
    if region_name.lower() == "demo":
        starttime = obspy.UTCDateTime("2019-07-04T17")
        endtime = obspy.UTCDateTime("2019-07-04T19")
    elif region_name.lower() == "california":
        starttime = obspy.UTCDateTime("2019-07-04T00")
        endtime = obspy.UTCDateTime("2019-07-10T00")
    elif region_name.lower() == "japan":
        starttime = obspy.UTCDateTime("2024-01-01T00")
        endtime = obspy.UTCDateTime("2024-01-31T00")
    elif region_name.lower() == "hawaii":
        starttime = obspy.UTCDateTime("2018-01-01T00")
        endtime = obspy.UTCDateTime("2022-08-12T00")
    else:
        # Default to demo settings
        starttime = obspy.UTCDateTime("2019-07-04T17")
        endtime = obspy.UTCDateTime("2019-07-04T19")

print(f"Center coordinates: {center}")
print(f"Geographic bounds: {bounds}")
print(f"Time range: {starttime} to {endtime}")

In [None]:
# Configure data sources based on region
data_sources = region_config.config.get('data_sources', {})

# Default data source configuration
if region_name.lower() in ["demo", "california"]:
    client = "SCEDC"
    network_list = ["CI"]
    channel_list = "HH*,BH*,EH*,HN*"
elif region_name.lower() == "japan":
    client = "NIED"
    network_list = ["N.NIED"]
    channel_list = "HH*,BH*,EH*"
elif region_name.lower() == "hawaii":
    client = "IRIS"
    network_list = ["HV", "PT"]
    channel_list = "HH*,BH*,EH*,HN*"
else:
    # Use data sources from config if available
    waveform_config = data_sources.get('waveforms', {})
    client = waveform_config.get('provider', 'IRIS')
    network_list = ["*"]
    channel_list = "HH*,BH*,EH*,HN*"

print(f"Data client: {client}")
print(f"Networks: {network_list}")
print(f"Channels: {channel_list}")

In [None]:
# Create enhanced configuration dictionary
import json
import numpy as np

degree2km = np.pi * 6371 / 180

# Enhanced configuration with unified framework integration
config = {
    "region": region_name,
    "center": center,
    "xlim_degree": [bounds['minlongitude'], bounds['maxlongitude']],
    "ylim_degree": [bounds['minlatitude'], bounds['maxlatitude']],
    "min_longitude": bounds['minlongitude'],
    "max_longitude": bounds['maxlongitude'],
    "min_latitude": bounds['minlatitude'],
    "max_latitude": bounds['maxlatitude'],
    "degree2km": degree2km,
    "starttime": starttime.datetime.isoformat(timespec="milliseconds"),
    "endtime": endtime.datetime.isoformat(timespec="milliseconds"),
    "networks": network_list,
    "channels": channel_list,
    "client": client,
    # Enhanced processing configuration from unified framework
    "phasenet": region_config.get_processing_config('phasenet'),
    "gamma": region_config.get_processing_config('gamma'),
    "adloc": region_config.get_processing_config('adloc'),
    "hypodd": {"MAXEVENT": 1e4},
    # Add velocity model information
    "velocity_model": region_config.config.get('velocity_model', {}),
    "quality_control": region_config.config.get('quality_control', {})
}

print(f"Enhanced configuration created for region: {region_name}")
print(f"Processing modules configured: {list(config.keys())}")

In [None]:
# Create output directory and save configuration files
config_dir_name = 'config'
config_dir = Path(config_dir_name)
config_dir.mkdir(parents=True, exist_ok=True)

config_json = config_dir / 'config.json'
datetime_json = config_dir / 'datetime.json'
index_json = config_dir / 'index.json'

# Save main configuration
with open(config_json, "w") as fp:
    json.dump(config, fp, indent=2)

ElyraUtils.log_pipeline_step(
    "config_creation",
    "completed",
    {"config_file": str(config_json), "region": region_name}
)

In [None]:
# Enhanced parallel processing configuration
one_hour = datetime.timedelta(hours=1)
starttimes = []
tmp_start = starttime

while tmp_start < endtime:
    starttimes.append(tmp_start.datetime.isoformat(timespec="milliseconds"))
    tmp_start += one_hour

# Save datetime configuration
with open(datetime_json, "w") as fp:
    json.dump(
        {
            "starttimes": starttimes, 
            "interval": one_hour.total_seconds(),
            "region": region_name,
            "total_hours": len(starttimes)
        },
        fp,
        indent=2,
    )

# Enhanced parallel processing index
if num_parallel == 0:
    num_parallel = min(60, max(1, int((len(starttimes) - 1) // 6 + 1)))

idx = [x.tolist() for x in np.array_split(np.arange(len(starttimes)), num_parallel)]

# Save index configuration with metadata
index_config = {
    "indices": idx,
    "num_parallel": num_parallel,
    "total_time_slots": len(starttimes),
    "slots_per_process": [len(x) for x in idx],
    "region": region_name
}

with open(index_json, "w") as fp:
    json.dump(index_config, fp, indent=2)

print(f"Parallel processing configured:")
print(f"  - Total time slots: {len(starttimes)}")
print(f"  - Parallel processes: {num_parallel}")
print(f"  - Slots per process: {[len(x) for x in idx]}")

In [None]:
# Summary and artifact creation
artifacts = {
    "config.json": str(config_json),
    "datetime.json": str(datetime_json),
    "index.json": str(index_json)
}

results = {
    "region": region_name,
    "time_slots": len(starttimes),
    "parallel_processes": num_parallel,
    "geographic_bounds": f"{bounds['minlatitude']:.2f}-{bounds['maxlatitude']:.2f}°N, {bounds['minlongitude']:.2f}-{bounds['maxlongitude']:.2f}°E",
    "time_range": f"{starttime.date} to {endtime.date}",
    "data_client": client
}

print("\n" + "="*60)
print("ENHANCED CONFIGURATION SETUP COMPLETED")
print("="*60)
for key, value in results.items():
    print(f"{key.replace('_', ' ').title()}: {value}")
print("\nConfiguration files created:")
for name, path in artifacts.items():
    print(f"  - {name}: {path}")
print("="*60)

In [None]:
# Finalize notebook with enhanced metadata
notebook_finalize(
    step_name="enhanced_config_setup",
    results=results,
    artifacts=artifacts
)

print("Enhanced configuration setup completed successfully!")
print(f"Ready for {region_name} earthquake data processing pipeline.")