In [None]:
import os
import json

In [None]:
# 方法3: split_hypodd関数を実行してindexを取得
def get_index_from_split_hypodd(
    config_json: str = "config/config.json",
    catalog_csv: str = "gamma/gamma_catalog.csv"
) -> list:
    """split_hypodd関数を実行してindex範囲を取得"""
    import pandas as pd
    
    try:
        with open(config_json, "r") as fp:
            config = json.load(fp)

        events = pd.read_csv(catalog_csv)

        if "MAXEVENT" in config["hypodd"]:
            MAXEVENT = config["hypodd"]["MAXEVENT"]
        else:
            MAXEVENT = 1e4

        MAXEVENT = len(events) // ((len(events) - 1) // MAXEVENT + 1) + 1
        num_parallel = int((len(events) - 1) // MAXEVENT + 1)
        
        index_list = list(range(num_parallel))
        print(f"Calculated index from catalog: {index_list} (total events: {len(events)}, MAXEVENT: {MAXEVENT})")
        return index_list
        
    except Exception as e:
        print(f"Error calculating index: {e}")
        print("Using default index [0]")
        return [0]

# カタログから動的にindexを計算
index = get_index_from_split_hypodd()
print(f"Calculated index: {index}")

In [None]:
def merge_hypodd(
    index: list,
    config_json: "json",
    catalog_ct: str,
    catalog_cc: str,
    # bucket_name: str = "catalogs",
    # s3_url: str = "minio-service:9000",
    # secure: bool = False,
):
    import json
    import os
    from glob import glob

    # from minio import Minio

    # minioClient = Minio(s3_url, access_key="minio", secret_key="minio123", secure=secure)

    with open(config_json, "r") as fp:
        config = json.load(fp)

    # objects = minioClient.list_objects(bucket_name, prefix=f"{config['region']}/hypodd/hypodd_", recursive=True)

    # Extract hypo_reloc archive if it exists
    hypo_reloc_dir = "hypo_reloc"
    archive_path = "hypo_reloc.tar.gz"
    
    if os.path.exists(archive_path):
        print(f"Found archive: {archive_path}, extracting...")
        import tarfile
        with tarfile.open(archive_path, "r:gz") as tar:
            tar.extractall()
        print(f"Archive extracted successfully")
        
    if os.path.exists(hypo_reloc_dir):
        print(f"Using files from extracted directory: {hypo_reloc_dir}")
        hypodd_ct_catalogs = [os.path.join(hypo_reloc_dir, f"hypodd_ct_{i:03d}.reloc") for i in index]
        hypodd_cc_catalogs = [os.path.join(hypo_reloc_dir, f"hypodd_cc_{i:03d}.reloc") for i in index]
    else:
        # Fallback to legacy hy/ directory structure
        print("No hypo_reloc directory found, using legacy hy/ structure")
        tmp_path = lambda x: os.path.join("hy/", x)
        hypodd_ct_catalogs = [tmp_path(f"hypodd_ct_{i:03d}.reloc") for i in index]
        hypodd_cc_catalogs = [tmp_path(f"hypodd_cc_{i:03d}.reloc") for i in index]
    # Create output directory
    output_dir = "merged_outputs"
    os.makedirs(output_dir, exist_ok=True)
    
    # Merge CT catalogs
    print(f"Merging CT catalogs: {hypodd_ct_catalogs}")
    ct_temp_file = os.path.join(output_dir, 'hypodd_ct_catalog_temp.txt')
    
    # Check which files exist
    existing_ct_files = [f for f in hypodd_ct_catalogs if os.path.exists(f)]
    if existing_ct_files:
        print(f"Found CT files: {existing_ct_files}")
        os.system(f"cat {' '.join(existing_ct_files)} > {ct_temp_file}")
        os.system(f"cat {ct_temp_file} > {catalog_ct}")
    else:
        print("No CT files found, creating empty catalog")
        with open(catalog_ct, 'w') as f:
            f.write("# No CT relocation files found\n")

    # Merge CC catalogs  
    print(f"Merging CC catalogs: {hypodd_cc_catalogs}")
    cc_temp_file = os.path.join(output_dir, 'hypodd_cc_catalog_temp.txt')
    
    # Check which files exist and are not empty
    existing_cc_files = []
    for f in hypodd_cc_catalogs:
        if os.path.exists(f) and os.path.getsize(f) > 50:  # Skip empty placeholder files
            existing_cc_files.append(f)
    
    if existing_cc_files:
        print(f"Found CC files: {existing_cc_files}")
        os.system(f"cat {' '.join(existing_cc_files)} > {cc_temp_file}")
        os.system(f"cat {cc_temp_file} > {catalog_cc}")
    else:
        print("No CC files found or all files are empty, creating empty catalog")
        with open(catalog_cc, 'w') as f:
            f.write("# No CC relocation files found\n")

    return 0

In [None]:
merge_hypodd(
    index=get_index_from_split_hypodd(),
    config_json="config/config.json",
    catalog_ct="hypodd_ct_catalog.txt",
    catalog_cc="hypodd_cc_catalog.txt",
)

In [8]:
# Kubeflow Pipelines UI用のメタデータ出力
if os.environ.get('ELYRA_RUNTIME_ENV') == 'kfp':
    # For information about Elyra environment variables refer to
    # https://elyra.readthedocs.io/en/stable/user_guide/best-practices-file-based-nodes.html#proprietary-environment-variables

    metadata = {
        'outputs': [
            {
                'storage': 'inline',
                'source': f'# Merge hypodd results Complete\n...',
                'type': 'markdown',
            }
        ]
    }

    with open('mlpipeline-ui-metadata.json', 'w') as f:
        json.dump(metadata, f)