In [None]:
# Parameters
node_i = 0
bucket_name = "catalogs"
s3_url = "minio-service:9000"
secure = False


In [None]:
import json
import os
import tempfile
from datetime import datetime
from minio import Minio
import subprocess
import shutil


In [None]:
# Load configuration
config_path = "config/config.json"
with open(config_path, "r") as fp:
    config = json.load(fp)

region_name = config["region"]
print(f"Processing region: {region_name}")


In [None]:
# Check for files downloaded by Elyra
# Elyra downloads files to the current directory structure
elyra_ct_file = f"hypodd/dt.ct"
elyra_event_file = f"hypodd/event.sel"
elyra_station_file = f"hypodd/stations.dat"
elyra_cc_file = f"hypodd/dt.cc"  # May not exist

print("Checking files downloaded by Elyra:")
for file_path, description in [
    (elyra_ct_file, "CT differential times"),
    (elyra_event_file, "Event file"),
    (elyra_station_file, "Station file"),
    (elyra_cc_file, "CC differential times")
]:
    if os.path.exists(file_path):
        size = os.path.getsize(file_path)
        print(f"  ✓ {description}: {file_path} ({size} bytes)")
    else:
        print(f"  ✗ {description}: {file_path} (NOT FOUND)")

# Check if CC data is available
cc_available = os.path.exists(elyra_cc_file) and os.path.getsize(elyra_cc_file) > 0
print(f"\nCC data available: {cc_available}")


In [None]:
# Setup working directory structure in current directory
work_dir = "work"
os.makedirs(work_dir, exist_ok=True)

# Create HypoDD working directories
hypodd_ct_path = f"{work_dir}/hypodd_ct"
hypodd_cc_path = f"{work_dir}/hypodd_cc"
os.makedirs(hypodd_ct_path, exist_ok=True)
os.makedirs(hypodd_cc_path, exist_ok=True)
print(f"Created working directories:")
print(f"  CT: {hypodd_ct_path}")
print(f"  CC: {hypodd_cc_path}")


In [None]:
# Verify required files exist
required_files = [
    (elyra_ct_file, "CT differential times"),
    (elyra_event_file, "Event file"),
    (elyra_station_file, "Station file")
]

for file_path, description in required_files:
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"Required file not found: {file_path} ({description})")
    if os.path.getsize(file_path) == 0:
        raise ValueError(f"Required file is empty: {file_path} ({description})")

print("All required files verified successfully")


In [None]:
# Create HypoDD CT configuration 
ct_inp_content = """* RELOC.INP:
*--- input file selection
* cross correlation diff times:

*
*catalog P diff times:
dt.ct
*
* event file:
event.sel
*
* station file:
stations.dat
*
*--- output file selection
* original locations:
hypodd.loc
* relocations:
hypodd.reloc
* station information:
hypodd.sta
* residual information:
hypodd.res
* source paramater information:
hypodd.src
*
*--- data type selection: 
* IDAT:  0 = synthetics; 1= cross corr; 2= catalog; 3= cross & cat 
* IPHA: 1= P; 2= S; 3= P&S
* DIST:max dist [km] between cluster centroid and station 
* IDAT   IPHA   DIST
    2     3     120
*
*--- event clustering:
* OBSCC:    min # of obs/pair for crosstime data (0= no clustering)
* OBSCT:    min # of obs/pair for network data (0= no clustering)
* OBSCC  OBSCT    
     0     8        
*
*--- solution control:
* ISTART:  	1 = from single source; 2 = from network sources
* ISOLV:	1 = SVD, 2=lsqr
* NSET:      	number of sets of iteration with specifications following
*  ISTART  ISOLV  NSET
    2        2      4
*
*--- data weighting and re-weighting: 
* NITER: 		last iteration to used the following weights
* WTCCP, WTCCS:		weight cross P, S 
* WTCTP, WTCTS:		weight catalog P, S 
* WRCC, WRCT:		residual threshold in sec for cross, catalog data 
* WDCC, WDCT:  		max dist [km] between cross, catalog linked pairs
* DAMP:    		damping (for lsqr only) 
*       ---  CROSS DATA ----- ----CATALOG DATA ----
* NITER WTCCP WTCCS WRCC WDCC WTCTP WTCTS WRCT WDCT DAMP
   4     -9     -9   -9    -9   1     1      8   -9  70 
   4     -9     -9   -9    -9   1     1      6    4  70 
   4     -9     -9   -9    -9   1    0.8     4    2  70 
   4     -9     -9   -9    -9   1    0.8     3    2  70 
*
*--- 1D model:
* NLAY:	number of model layers  
* RATIO:	vp/vs ratio 
* TOP:	depths of top of layer (km) 
* VEL: 	layer velocities (km/s)
* NLAY  RATIO 
   12     1.82
* TOP 
0.0 1.0 3.0 5.0 7.0 9.0 11.0 13.0 17.0 21.0 31.00 31.10
* VEL
5.30 5.65 5.93 6.20 6.20 6.20 6.20 6.20 6.20 6.20 7.50 8.11
*
*--- event selection:
* CID: 	cluster to be relocated (0 = all)
* ID:	cuspids of event to be relocated (8 per line)
* CID    
    0      
* ID
"""

with open(os.path.join(hypodd_ct_path, "ct.inp"), "w") as fp:
    fp.write(ct_inp_content)
print("Created HypoDD CT configuration file")


In [None]:
# Copy input files to CT working directory
def copy_file(fp_from, fp_to):
    with open(fp_from, "r") as fp:
        lines = fp.readlines()
    with open(fp_to, "w") as fp:
        fp.writelines(lines)

copy_file(elyra_ct_file, os.path.join(hypodd_ct_path, "dt.ct"))
copy_file(elyra_event_file, os.path.join(hypodd_ct_path, "event.sel"))
copy_file(elyra_station_file, os.path.join(hypodd_ct_path, "stations.dat"))
print("Copied input files to CT working directory")


In [None]:
# Display CT input file info
print("CT input file sizes:")
for filename in ["dt.ct", "event.sel", "stations.dat"]:
    filepath = os.path.join(hypodd_ct_path, filename)
    if os.path.exists(filepath):
        size = os.path.getsize(filepath)
        print(f"  {filename}: {size} bytes")
        if size == 0:
            print(f"  WARNING: {filename} is empty!")
    else:
        print(f"  {filename}: NOT FOUND")


In [None]:
# Run HypoDD CT
print("=" * 50)
print("RUNNING HYPODD CT PROCESSING")
print("=" * 50)

HYPODD_CT_CMD = ["hypoDD", "ct.inp"]
print(f"Running HypoDD CT command: {' '.join(HYPODD_CT_CMD)}")
print(f"Working directory: {hypodd_ct_path}")

try:
    result = subprocess.run(
        HYPODD_CT_CMD,
        cwd=hypodd_ct_path,
        capture_output=True,
        text=True,
        check=True
    )
    print("HypoDD CT completed successfully")
    print("STDOUT:")
    print(result.stdout)
    if result.stderr:
        print("STDERR:")
        print(result.stderr)
        
except subprocess.CalledProcessError as e:
    print(f"HypoDD CT failed with return code {e.returncode}")
    print("STDOUT:")
    print(e.stdout)
    print("STDERR:")
    print(e.stderr)
    raise
except FileNotFoundError as e:
    print(f"HypoDD executable not found: {e}")
    print("Available files in CT working directory:")
    for f in os.listdir(hypodd_ct_path):
        print(f"  {f}")
    raise


In [None]:
# Check CT output files
output_files = ["hypodd.reloc", "hypodd.loc", "hypodd.sta", "hypodd.res", "hypodd.src"]
print("CT output file status:")
for filename in output_files:
    filepath = os.path.join(hypodd_ct_path, filename)
    if os.path.exists(filepath):
        size = os.path.getsize(filepath)
        print(f"  {filename}: {size} bytes")
    else:
        print(f"  {filename}: NOT FOUND")

# Check if main CT output file exists
ct_reloc_file = os.path.join(hypodd_ct_path, "hypodd.reloc")
if not os.path.exists(ct_reloc_file):
    print("ERROR: HypoDD CT hypodd.reloc file was not created!")
    print("CT working directory contents:")
    for f in os.listdir(hypodd_ct_path):
        print(f"  {f}")
    raise FileNotFoundError("HypoDD CT output file not created")


In [None]:
# Copy CT output file to final location
outputs_dir = "hypo_reloc"
os.makedirs(outputs_dir, exist_ok=True)
catalog_ct_path = f"{outputs_dir}/hypodd_ct_{node_i:03d}.reloc"
copy_file(ct_reloc_file, catalog_ct_path)
print(f"CT output catalog saved to: {catalog_ct_path}")

# Display first few lines of CT output
if os.path.exists(catalog_ct_path):
    with open(catalog_ct_path, "r") as f:
        lines = f.readlines()[:10]
    print("\nFirst 10 lines of CT output catalog:")
    for i, line in enumerate(lines, 1):
        print(f"{i:2d}: {line.rstrip()}")


In [None]:
# Process CC if available
catalog_cc_path = f"{outputs_dir}/hypodd_cc_{node_i:03d}.reloc"
if cc_available:
    print("\n" + "=" * 50)
    print("RUNNING HYPODD CC PROCESSING")
    print("=" * 50)
    
    # Create CC configuration
    cc_inp_content = """* RELOC.INP:
*--- input file selection
* cross correlation diff times:
dt.cc
*
*catalog P diff times:
dt.ct
*
* event file:
event.sel
*
* station file:
stations.dat
*
*--- output file selection
* original locations:
hypodd.loc
* relocations:
hypodd.reloc
* station information:
hypodd.sta
* residual information:
hypodd.res
* source paramater information:
hypodd.src
*
*--- data type selection: 
* IDAT:  0 = synthetics; 1= cross corr; 2= catalog; 3= cross & cat 
* IPHA: 1= P; 2= S; 3= P&S
* DIST:max dist [km] between cluster centroid and station 
* IDAT   IPHA   DIST
    3     3     120
*
*--- event clustering:
* OBSCC:    min # of obs/pair for crosstime data (0= no clustering)
* OBSCT:    min # of obs/pair for network data (0= no clustering)
* OBSCC  OBSCT    
     8     8        
*
*--- solution control:
* ISTART:  	1 = from single source; 2 = from network sources
* ISOLV:	1 = SVD, 2=lsqr
* NSET:      	number of sets of iteration with specifications following
*  ISTART  ISOLV  NSET
    2        2      4
*
*--- data weighting and re-weighting: 
* NITER: 		last iteration to used the following weights
* WTCCP, WTCCS:		weight cross P, S 
* WTCTP, WTCTS:		weight catalog P, S 
* WRCC, WRCT:		residual threshold in sec for cross, catalog data 
* WDCC, WDCT:  		max dist [km] between cross, catalog linked pairs
* DAMP:    		damping (for lsqr only) 
*       ---  CROSS DATA ----- ----CATALOG DATA ----
* NITER WTCCP WTCCS WRCC WDCC WTCTP WTCTS WRCT WDCT DAMP
   4    1.0   1.0   6    2   1.0   1.0     8   -9  70 
   4    1.0   1.0   4    2   1.0   1.0     6    4  70 
   4    1.0   1.0   3    2   1.0   0.8     4    2  70 
   4    1.0   1.0   2    2   1.0   0.8     3    2  70 
*
*--- 1D model:
* NLAY:	number of model layers  
* RATIO:	vp/vs ratio 
* TOP:	depths of top of layer (km) 
* VEL: 	layer velocities (km/s)
* NLAY  RATIO 
   12     1.82
* TOP 
0.0 1.0 3.0 5.0 7.0 9.0 11.0 13.0 17.0 21.0 31.00 31.10
* VEL
5.30 5.65 5.93 6.20 6.20 6.20 6.20 6.20 6.20 6.20 7.50 8.11
*
*--- event selection:
* CID: 	cluster to be relocated (0 = all)
* ID:	cuspids of event to be relocated (8 per line)
* CID    
    0      
* ID
"""
    
    # Create CC configuration file
    with open(os.path.join(hypodd_cc_path, "cc.inp"), "w") as fp:
        fp.write(cc_inp_content)
    print("Created HypoDD CC configuration file")
    
    # Copy input files to CC working directory
    copy_file(elyra_ct_file, os.path.join(hypodd_cc_path, "dt.ct"))
    copy_file(elyra_cc_file, os.path.join(hypodd_cc_path, "dt.cc"))
    copy_file(elyra_event_file, os.path.join(hypodd_cc_path, "event.sel"))
    copy_file(elyra_station_file, os.path.join(hypodd_cc_path, "stations.dat"))
    print("Copied input files to CC working directory")
    
    # Display CC input file info
    print("CC input file sizes:")
    for filename in ["dt.ct", "dt.cc", "event.sel", "stations.dat"]:
        filepath = os.path.join(hypodd_cc_path, filename)
        if os.path.exists(filepath):
            size = os.path.getsize(filepath)
            print(f"  {filename}: {size} bytes")
            if size == 0:
                print(f"  WARNING: {filename} is empty!")
        else:
            print(f"  {filename}: NOT FOUND")
    
    # Run HypoDD CC
    HYPODD_CC_CMD = ["hypoDD", "cc.inp"]
    print(f"Running HypoDD CC command: {' '.join(HYPODD_CC_CMD)}")
    print(f"Working directory: {hypodd_cc_path}")
    
    try:
        result = subprocess.run(
            HYPODD_CC_CMD,
            cwd=hypodd_cc_path,
            capture_output=True,
            text=True,
            check=True
        )
        print("HypoDD CC completed successfully")
        print("STDOUT:")
        print(result.stdout)
        if result.stderr:
            print("STDERR:")
            print(result.stderr)
            
        # Check CC output files
        print("CC output file status:")
        for filename in output_files:
            filepath = os.path.join(hypodd_cc_path, filename)
            if os.path.exists(filepath):
                size = os.path.getsize(filepath)
                print(f"  {filename}: {size} bytes")
            else:
                print(f"  {filename}: NOT FOUND")
        
        # Copy CC output file
        cc_reloc_file = os.path.join(hypodd_cc_path, "hypodd.reloc")
        if os.path.exists(cc_reloc_file):
            copy_file(cc_reloc_file, catalog_cc_path)
            print(f"CC output catalog saved to: {catalog_cc_path}")
            
            # Display first few lines of CC output
            with open(catalog_cc_path, "r") as f:
                lines = f.readlines()[:10]
            print("\nFirst 10 lines of CC output catalog:")
            for i, line in enumerate(lines, 1):
                print(f"{i:2d}: {line.rstrip()}")
        else:
            print("ERROR: HypoDD CC hypodd.reloc file was not created!")
            
    except subprocess.CalledProcessError as e:
        print(f"HypoDD CC failed with return code {e.returncode}")
        print("STDOUT:")
        print(e.stdout)
        print("STDERR:")
        print(e.stderr)
        print("Continuing without CC results...")
        # Create empty CC file for pipeline compatibility
        with open(catalog_cc_path, "w") as f:
            f.write("# CC processing failed - empty file\n")
    except FileNotFoundError as e:
        print(f"HypoDD executable not found for CC: {e}")
        print("Continuing without CC results...")
        # Create empty CC file for pipeline compatibility
        with open(catalog_cc_path, "w") as f:
            f.write("# HypoDD executable not found - empty file\n")
        
else:
    print("\nCC processing skipped (no CC data available)")
    print("Creating empty CC output file for pipeline compatibility")
    with open(catalog_cc_path, "w") as f:
        f.write("# No CC data available - empty file\n")
    print(f"Empty CC file created: {catalog_cc_path}")


In [None]:
# Output metadata for Kubeflow UI
ct_size = os.path.getsize(catalog_ct_path) if os.path.exists(catalog_ct_path) else 0
cc_size = os.path.getsize(catalog_cc_path) if catalog_cc_path and os.path.exists(catalog_cc_path) else 0

metadata = {
    "outputs": [
        {
            "type": "table",
            "storage": "inline",
            "format": "csv",
            "header": ["Metric", "Value"],
            "source": [
                ["Region", region_name],
                ["Node Index", str(node_i)],
                ["CT Output File", f"hypodd_ct_{node_i:03d}.reloc"],
                ["CT File Size (bytes)", str(ct_size)],
                ["CC Output File", f"hypodd_cc_{node_i:03d}.reloc"],
                ["CC File Size (bytes)", str(cc_size)],
                ["CC Data Available", "Yes" if cc_available else "No"],
                ["CT Status", "Completed Successfully"],
                ["CC Status", "Completed Successfully" if cc_available and cc_size > 50 else "Skipped or Failed"]
            ]
        }
    ]
}

with open(f"{outputs_dir}/mlpipeline-ui-metadata.json", "w") as f:
    json.dump(metadata, f, indent=2)

print("\n" + "=" * 60)
print("HYPODD RE-LOCATION PROCESSING COMPLETED")
print("=" * 60)
print(f"CT catalog: {catalog_ct_path} ({ct_size} bytes)")
if catalog_cc_path:
    print(f"CC catalog: {catalog_cc_path} ({cc_size} bytes)")
print(f"Output files saved to {outputs_dir}/ directory")

# Archive hypo_reloc directory
import tarfile

archive_path = "hypo_reloc.tar.gz"
print(f"\nCreating archive: {archive_path}")

with tarfile.open(archive_path, "w:gz") as tar:
    tar.add(outputs_dir, arcname=outputs_dir)

# Verify archive creation
if os.path.exists(archive_path):
    archive_size = os.path.getsize(archive_path)
    print(f"Archive created successfully: {archive_path} ({archive_size} bytes)")
else:
    raise FileNotFoundError(f"Failed to create archive: {archive_path}")

print("=" * 60)
