In [None]:
import json
import os
from datetime import datetime

import pandas as pd
from tqdm import tqdm

In [None]:
# デバッグ: 現在の環境とファイルを確認
import os
print("=== Environment Debug ===")
print(f"Current working directory: {os.getcwd()}")
print(f"Directory contents: {os.listdir('.')}")

# サブディレクトリも確認
for item in os.listdir('.'):
    if os.path.isdir(item):
        print(f"Directory {item}/: {os.listdir(item)}")

print("\n=== Environment Variables ===")
import os
for key, value in os.environ.items():
    if 'ELYRA' in key or 'KFP' in key or 'KUBEFLOW' in key:
        print(f"{key}: {value}")

In [None]:
# num_parallelファイルを探す
import os
import glob

print("=== Looking for num_parallel.txt ===")
# 現在のディレクトリで探す
if os.path.exists('num_parallel.txt'):
    with open('num_parallel.txt', 'r') as f:
        content = f.read().strip()
        print(f"Found num_parallel.txt: {content}")
        try:
            nodes = [int(x) for x in content.strip('[]').split(',')]
        except:
            nodes = [0]  # デフォルト
else:
    # グロブパターンで探す
    files = glob.glob('**/num_parallel.txt', recursive=True)
    print(f"Searching recursively: {files}")
    if files:
        with open(files[0], 'r') as f:
            content = f.read().strip()
            nodes = [int(x) for x in content.strip('[]').split(',')]
    else:
        print("num_parallel.txt not found, using default")
        nodes = [0]

print(f"Using nodes: {nodes}")

In [None]:
def convert_phase(
    node_i: int,
    config_json: str,
    picks_csv: str,
    catalog_csv: str,
    hypodd_phase:str,
    data_path: str,
    # bucket_name: str = "catalogs",
    # s3_url: str = "minio-service:9000",
    # secure: bool = False,
):

    with open(config_json, "r") as fp:
        config = json.load(fp)
    hypodd_path = os.path.join(data_path, "hypodd")
    if not os.path.exists(hypodd_path):
        os.mkdir(hypodd_path)

    picks = pd.read_csv(picks_csv)
    events = pd.read_csv(catalog_csv)

    if "MAXEVENT" in config["hypodd"]:
        MAXEVENT = config["hypodd"]["MAXEVENT"]
    else:
        MAXEVENT = 1e4  ## segment by time
    MAXEVENT = len(events) // ((len(events) - 1) // MAXEVENT + 1) + 1
    num_parallel = int((len(events) - 1) // MAXEVENT + 1)

    events.sort_values("time", inplace=True)
    events = events.iloc[node_i::num_parallel]
    picks = picks.loc[picks["event_index"].isin(events["event_index"])]
    # output_lines = []
    output_file = open(hypodd_phase, "w")

    picks_by_event = picks.groupby("event_index").groups
    # for i in tqdm(range(node_i, len(events), num_parallel)):
    #     event = events.iloc[i]
    for i, event in events.iterrows():
        event_time = datetime.strptime(event["time"], "%Y-%m-%dT%H:%M:%S.%f")
        lat = event["latitude"]
        lng = event["longitude"]
        dep = event["depth(m)"] / 1e3
        mag = event["magnitude"]
        EH = 0
        EZ = 0
        RMS = event["sigma_time"]

        year, month, day, hour, min, sec = (
            event_time.year,
            event_time.month,
            event_time.day,
            event_time.hour,
            event_time.minute,
            float(event_time.strftime("%S.%f")),
        )
        event_line = f"# {year:4d} {month:2d} {day:2d} {hour:2d} {min:2d} {sec:5.2f}  {lat:7.4f} {lng:9.4f}   {dep:5.2f} {mag:5.2f} {EH:5.2f} {EZ:5.2f} {RMS:5.2f} {event['event_index']:9d}\n"

        # output_lines.append(event_line)
        output_file.write(event_line)

        picks_idx = picks_by_event[event["event_index"]]
        for j in picks_idx:
            # pick = picks.iloc[j]
            pick = picks.loc[j]
            network_code, station_code, comp_code, channel_code = pick["station_id"].split(".")
            phase_type = pick["phase_type"].upper()
            phase_score = pick["phase_score"]
            pick_time = (datetime.strptime(pick["phase_time"], "%Y-%m-%dT%H:%M:%S.%f") - event_time).total_seconds()
            tmp_code = f"{station_code}{channel_code}"
            pick_line = f"{tmp_code:<7s}   {pick_time:6.3f}   {phase_score:5.4f}   {phase_type}\n"
            # output_lines.append(pick_line)
            output_file.write(pick_line)

    # with open(hypodd_phase, "w") as fp:
    #     fp.writelines(output_lines)
    
    return hypodd_phase

In [None]:
# 必要なファイルを探す
import os
import glob

def find_file(filename):
    # 現在のディレクトリ
    if os.path.exists(filename):
        return filename
    
    # サブディレクトリを再帰的に検索
    files = glob.glob(f'**/{filename}', recursive=True)
    if files:
        return files[0]
    
    return None

# ファイルを探す
config_file = find_file('config.json') or find_file('config/config.json')
picks_file = find_file('gamma_picks.csv') or find_file('gamma/gamma_picks.csv')
catalog_file = find_file('gamma_catalog.csv') or find_file('gamma/gamma_catalog.csv')

print(f"Found files:")
print(f"  config: {config_file}")
print(f"  picks: {picks_file}")
print(f"  catalog: {catalog_file}")

if all([config_file, picks_file, catalog_file]):
    for node_i in nodes:
        convert_phase(
            node_i,
            config_file,
            picks_file,
            catalog_file,
            f"hypodd_phase_{node_i:03d}.pha",
            "./",
        )
else:
    print("ERROR: Required files not found!")
    missing = []
    if not config_file: missing.append('config.json')
    if not picks_file: missing.append('gamma_picks.csv')
    if not catalog_file: missing.append('gamma_catalog.csv')
    print(f"Missing: {missing}")

In [None]:
with open('num_parallel.txt', 'r') as f:
    nodes = [int(x) for x in f.read().strip('[]').split(',')]

In [None]:
import os
import glob

def find_file(filename):
    if os.path.exists(filename):
        return filename
    files = glob.glob(f'**/{filename}', recursive=True)
    return files[0] if files else None

# 正しいパスを使用
config_file = find_file('config.json') or "config/config.json"
picks_file = find_file('gamma_picks.csv') or "gamma/gamma_picks.csv" 
catalog_file = find_file('gamma_catalog.csv') or "gamma/gamma_catalog.csv"

# 各nodeの処理
generated_files = []
for node_i in nodes:
    output_file = f"hypodd_phase_{node_i:03d}.pha"
    convert_phase(
        node_i,
        config_file,
        picks_file,
        catalog_file,
        output_file,
        "./",
    )
    if os.path.exists(output_file):
        generated_files.append(output_file)
        print(f"Created: {output_file}")

# 複数ファイルを1つにマージ
print(f"Merging {len(generated_files)} files into hypodd_phase.pha")
with open('hypodd/hypodd_phase.pha', 'w') as merged_file:
    for phase_file in generated_files:
        print(f"Adding {phase_file}")
        with open(phase_file, 'r') as f:
            merged_file.write(f.read())
        # 個別ファイルを削除（オプション）
        # os.remove(phase_file)

print("Merged file created: hypodd_phase.pha")

# ファイルサイズを確認
if os.path.exists('hypodd_phase.pha'):
    size = os.path.getsize('hypodd_phase.pha')

In [8]:
# Kubeflow Pipelines UI用のメタデータ出力
if os.environ.get('ELYRA_RUNTIME_ENV') == 'kfp':
    # For information about Elyra environment variables refer to
    # https://elyra.readthedocs.io/en/stable/user_guide/best-practices-file-based-nodes.html#proprietary-environment-variables

    metadata = {
        'outputs': [
            {
                'storage': 'inline',
                'source': f'# Convert Station Format Complete\n...',
                'type': 'markdown',
            }
        ]
    }

    with open('mlpipeline-ui-metadata.json', 'w') as f:
        json.dump(metadata, f)