In [None]:
import os
import json
import pandas as pd
import plotly.graph_objects as go
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from minio import Minio


In [None]:
# # ファイルの存在確認と読み込み
# print("Available files:")
# for root, dirs, files in os.walk("."):
#     for file in files:
#         file_path = os.path.join(root, file)
#         try:
#             file_size = os.path.getsize(file_path)
#         except FileNotFoundError:
#             file_size = 0
#         print(f"{file_path}: {file_size} bytes")

# 可視化用のディレクトリを作成
viz_dir = "visualize"
if not os.path.exists(viz_dir):
    os.makedirs(viz_dir)
    print(f"Created directory: {viz_dir}")

# 設定ファイルから地域名を取得
config_path = "config/config.json"
if not os.path.exists(config_path):
    raise FileNotFoundError(f"設定ファイル {config_path} が見つかりません")
    
with open(config_path, "r") as fp:
    config = json.load(fp)

region = config["region"]
print(f"Region: {region}")

# ファイルパスの確認
required_files = {
    "gamma_catalog": "gamma/gamma_catalog.csv",
    "standard_catalog": "events/standard_catalog.csv", 
    "hypodd_ct_catalog": "hypodd_ct_catalog.txt"
}

for name, path in required_files.items():
    if os.path.exists(path):
        size = os.path.getsize(path)
        print(f"{name}: {path} ({size} bytes)")
    else:
        print(f"WARNING: {name} file not found at {path}")

# HypoDD CCカタログの確認
has_cc_catalog = False
cc_catalog_path = "hypodd_cc_catalog.txt"
if os.path.exists(cc_catalog_path) and os.path.getsize(cc_catalog_path) > 0:
    has_cc_catalog = True
    print(f"HypoDD CC catalog found: {cc_catalog_path}")
else:
    print("HypoDD CC catalog not found or empty, skipping...")
    has_cc_catalog = False


In [None]:
def plot3d(x, y, z, fig_name, config):
    """3Dプロットを生成してHTMLファイルに保存"""
    fig = go.Figure(
        data=[
            go.Scatter3d(
                x=x,
                y=y,
                z=z,
                mode="markers",
                marker=dict(size=3.0, color=-z, cmin=-60, cmax=2, colorscale="Viridis", opacity=0.8),
            )
        ],
    )

    fig.update_layout(
        scene=dict(
            xaxis=dict(
                nticks=4,
                range=config["xlim_degree"],
            ),
            yaxis=dict(
                nticks=4,
                range=config["ylim_degree"],
            ),
            zaxis=dict(
                nticks=4,
                range=[60, -2],
            ),
            aspectratio=dict(x=1, y=1, z=0.5),
        ),
        margin=dict(r=0, l=0, b=0, t=0),
    )
    fig.write_html(fig_name)
    print(f"3D plot saved: {fig_name}")

def create_empty_plot(fig_name, title, config):
    """空のプロットを生成してHTMLファイルに保存"""
    fig = go.Figure()
    fig.update_layout(
        title=title,
        scene=dict(
            xaxis=dict(
                nticks=4,
                range=config["xlim_degree"],
            ),
            yaxis=dict(
                nticks=4,
                range=config["ylim_degree"],
            ),
            zaxis=dict(
                nticks=4,
                range=[60, -2],
            ),
            aspectratio=dict(x=1, y=1, z=0.5),
        ),
        margin=dict(r=0, l=0, b=0, t=0),
        annotations=[
            dict(
                text="No data available",
                xref="paper", yref="paper",
                x=0.5, y=0.5, xanchor='center', yanchor='middle',
                showarrow=False,
                font=dict(size=20, color="gray")
            )
        ]
    )
    fig.write_html(fig_name)
    print(f"Empty 3D plot saved: {fig_name}")

def load_and_process_catalogs():
    """カタログファイルを読み込み、処理する"""
    
    # GaMMAカタログ
    gamma_path = "gamma/gamma_catalog.csv"
    if not os.path.exists(gamma_path):
        raise FileNotFoundError(f"GaMMA catalog not found: {gamma_path}")
    gamma_catalog = pd.read_csv(gamma_path, parse_dates=["time"])
    gamma_catalog["depth_km"] = gamma_catalog["depth(m)"] / 1e3
    print(f"GaMMA catalog loaded: {len(gamma_catalog)} events")
    
    # 標準カタログ
    standard_path = "events/standard_catalog.csv"
    if not os.path.exists(standard_path):
        raise FileNotFoundError(f"Standard catalog not found: {standard_path}")
    standard_catalog = pd.read_csv(standard_path, parse_dates=["time"])
    standard_catalog["depth_km"] = standard_catalog["depth(m)"] / 1e3
    print(f"Standard catalog loaded: {len(standard_catalog)} events")
    
    # HypoDD CTカタログ
    hypodd_ct_path = "hypodd_ct_catalog.txt"
    if not os.path.exists(hypodd_ct_path) or os.path.getsize(hypodd_ct_path) == 0:
        print(f"WARNING: HypoDD CT catalog not found or empty: {hypodd_ct_path}")
        # 空のDataFrameを作成
        hypodd_ct_catalog = pd.DataFrame(columns=[
            "ID", "LAT", "LON", "DEPTH", "X", "Y", "Z", "EX", "EY", "EZ",
            "YR", "MO", "DY", "HR", "MI", "SC", "MAG", "NCCP", "NCCS",
            "NCTP", "NCTS", "RCC", "RCT", "CID", "time", "magnitude"
        ])
    else:
        hypodd_ct_catalog = pd.read_csv(
            hypodd_ct_path,
            sep="\s+",
            names=[
                "ID", "LAT", "LON", "DEPTH", "X", "Y", "Z", "EX", "EY", "EZ",
                "YR", "MO", "DY", "HR", "MI", "SC", "MAG", "NCCP", "NCCS",
                "NCTP", "NCTS", "RCC", "RCT", "CID",
            ],
        )
        hypodd_ct_catalog["time"] = hypodd_ct_catalog.apply(
            lambda x: f'{x["YR"]:04.0f}-{x["MO"]:02.0f}-{x["DY"]:02.0f}T{x["HR"]:02.0f}:{x["MI"]:02.0f}:{min(x["SC"], 59.999):05.3f}',
            axis=1,
        )
        hypodd_ct_catalog["magnitude"] = hypodd_ct_catalog["MAG"]
        hypodd_ct_catalog["time"] = hypodd_ct_catalog["time"].apply(lambda x: datetime.strptime(x, "%Y-%m-%dT%H:%M:%S.%f"))
    
    print(f"HypoDD CT catalog loaded: {len(hypodd_ct_catalog)} events")
    
    # HypoDD CCカタログ（存在する場合）
    hypodd_cc_catalog = None
    if has_cc_catalog:
        try:
            hypodd_cc_catalog = pd.read_csv(
                "hypodd_cc_catalog.txt",
                sep="\s+",
                names=[
                    "ID", "LAT", "LON", "DEPTH", "X", "Y", "Z", "EX", "EY", "EZ",
                    "YR", "MO", "DY", "HR", "MI", "SC", "MAG", "NCCP", "NCCS",
                    "NCTP", "NCTS", "RCC", "RCT", "CID",
                ],
            )
            hypodd_cc_catalog["time"] = hypodd_cc_catalog.apply(
                lambda x: f'{x["YR"]:04.0f}-{x["MO"]:02.0f}-{x["DY"]:02.0f}T{x["HR"]:02.0f}:{x["MI"]:02.0f}:{min(x["SC"], 59.999):05.3f}',
                axis=1,
            )
            hypodd_cc_catalog["time"] = hypodd_cc_catalog["time"].apply(
                lambda x: datetime.strptime(x, "%Y-%m-%dT%H:%M:%S.%f")
            )
            print(f"HypoDD CC catalog loaded: {len(hypodd_cc_catalog)} events")
        except Exception as err:
            print(f"Error loading HypoDD CC catalog: {err}")
            hypodd_cc_catalog = None
    
    return gamma_catalog, standard_catalog, hypodd_ct_catalog, hypodd_cc_catalog


In [None]:
# カタログデータを読み込み
gamma_catalog, standard_catalog, hypodd_ct_catalog, hypodd_cc_catalog = load_and_process_catalogs()

# 3Dプロットを生成
plot3d(
    gamma_catalog["longitude"],
    gamma_catalog["latitude"],
    gamma_catalog["depth_km"],
    "visualize/gamma_catalog.html",
    config
)

plot3d(
    standard_catalog["longitude"],
    standard_catalog["latitude"],
    standard_catalog["depth_km"],
    "visualize/standard_catalog.html",
    config
)

# HypoDD CTカタログのプロット（データがある場合のみ）
try:
    if hypodd_ct_catalog is not None and len(hypodd_ct_catalog) > 0:
        if all(col in hypodd_ct_catalog.columns for col in ["LON", "LAT", "DEPTH"]):
            plot3d(
                hypodd_ct_catalog["LON"],
                hypodd_ct_catalog["LAT"],
                hypodd_ct_catalog["DEPTH"],
                "visualize/hypodd_ct_catalog.html",
                config
            )
        else:
            print("HypoDD CT catalog is missing required columns (LON, LAT, DEPTH), creating empty plot")
            create_empty_plot("visualize/hypodd_ct_catalog.html", "HypoDD CT Catalog - Missing Columns", config)
    else:
        print("HypoDD CT catalog is empty or None, creating empty plot")
        create_empty_plot("visualize/hypodd_ct_catalog.html", "HypoDD CT Catalog - No Data", config)
except Exception as e:
    print(f"Error processing HypoDD CT catalog: {e}")
    create_empty_plot("visualize/hypodd_ct_catalog.html", "HypoDD CT Catalog - Error", config)

try:
    if hypodd_cc_catalog is not None and len(hypodd_cc_catalog) > 0:
        if all(col in hypodd_cc_catalog.columns for col in ["LON", "LAT", "DEPTH"]):
            plot3d(
                hypodd_cc_catalog["LON"],
                hypodd_cc_catalog["LAT"],
                hypodd_cc_catalog["DEPTH"],
                "visualize/hypodd_cc_catalog.html",
                config
            )
        else:
            print("HypoDD CC catalog is missing required columns (LON, LAT, DEPTH), creating empty plot")
            create_empty_plot("visualize/hypodd_cc_catalog.html", "HypoDD CC Catalog - Missing Columns", config)
    else:
        print("HypoDD CC catalog is empty or None, creating empty plot")
        create_empty_plot("visualize/hypodd_cc_catalog.html", "HypoDD CC Catalog - No Data", config)
except Exception as e:
    print(f"Error processing HypoDD CC catalog: {e}")
    create_empty_plot("visualize/hypodd_cc_catalog.html", "HypoDD CC Catalog - Error", config)


In [None]:
# ヒストグラムプロットを生成
bins = 30
config["starttime"] = datetime.fromisoformat(config["starttime"])
config["endtime"] = datetime.fromisoformat(config["endtime"])

# 時間別頻度
fig, ax = plt.subplots(figsize=(12, 6))
ax.hist(gamma_catalog["time"], range=(config["starttime"], config["endtime"]), bins=bins, 
        edgecolor="k", alpha=1.0, linewidth=0.5, label=f"GaMMA: {len(gamma_catalog)}")

# HypoDD CTカタログのヒストグラム（データがある場合のみ）
if len(hypodd_ct_catalog) > 0:
    ax.hist(hypodd_ct_catalog["time"], range=(config["starttime"], config["endtime"]), bins=bins, 
            edgecolor="k", alpha=0.8, linewidth=0.5, label=f"HypoDD CT: {len(hypodd_ct_catalog)}")

ax.hist(standard_catalog["time"], range=(config["starttime"], config["endtime"]), bins=bins, 
        edgecolor="k", alpha=0.6, linewidth=0.5, label=f"Standard: {len(standard_catalog)}")

ax.set_ylabel("Frequency")
ax.autoscale(enable=True, axis='x', tight=True)
fig.autofmt_xdate()
ax.legend()
plt.title("Earthquake Frequency over Time")
fig.savefig("visualize/earthquake_frequency_time.png", bbox_inches="tight", dpi=300)
plt.show()

# マグニチュード頻度
fig, ax = plt.subplots(figsize=(10, 6))
max_mag = max(standard_catalog["magnitude"].max(), gamma_catalog["magnitude"].max())
if len(hypodd_ct_catalog) > 0 and "magnitude" in hypodd_ct_catalog.columns:
    max_mag = max(max_mag, hypodd_ct_catalog["magnitude"].max())

xrange = (-1., max_mag)
ax.hist(gamma_catalog["magnitude"], range=xrange, bins=bins, alpha=1.0, 
        edgecolor="k", linewidth=0.5, label=f"GaMMA: {len(gamma_catalog['magnitude'])}")

# HypoDD CTカタログのマグニチュード（データがある場合のみ）
if len(hypodd_ct_catalog) > 0 and "magnitude" in hypodd_ct_catalog.columns:
    ax.hist(hypodd_ct_catalog["magnitude"], range=xrange, bins=bins, alpha=0.6, 
            edgecolor="k", linewidth=0.5, label=f"HypoDD CT: {len(hypodd_ct_catalog['magnitude'])}")

ax.hist(standard_catalog["magnitude"], range=xrange, bins=bins, alpha=0.6, 
        edgecolor="k", linewidth=0.5, label=f"Standard: {len(standard_catalog['magnitude'])}")

ax.legend()
ax.autoscale(enable=True, axis='x', tight=True)
ax.set_xlabel("Magnitude")
ax.set_ylabel("Frequency")
ax.set_yscale('log')
plt.title("Magnitude Frequency Distribution")
fig.savefig("visualize/earthquake_magnitude_frequency.png", bbox_inches="tight", dpi=300)
plt.show()

# マグニチュード時系列
fig, ax = plt.subplots(figsize=(12, 6))
ax.plot(gamma_catalog["time"], gamma_catalog["magnitude"], '.', markersize=5.0, alpha=1.0, 
        rasterized=True, label=f"GaMMA: {len(gamma_catalog['magnitude'])}")

# HypoDD CTカタログの時系列（データがある場合のみ）
if len(hypodd_ct_catalog) > 0 and "magnitude" in hypodd_ct_catalog.columns:
    ax.plot(hypodd_ct_catalog["time"], hypodd_ct_catalog["magnitude"], '.', markersize=5.0, alpha=1.0, 
            rasterized=True, label=f"HypoDD CT: {len(hypodd_ct_catalog['magnitude'])}")

ax.plot(standard_catalog["time"], standard_catalog["magnitude"], '.', markersize=5.0, alpha=1.0, 
        rasterized=True, label=f"Standard: {len(standard_catalog['magnitude'])}")

ax.set_xlim(config["starttime"], config["endtime"])
ax.set_ylabel("Magnitude")
ax.set_ylim(bottom=-1)
ax.legend(markerscale=2)
ax.grid()
fig.autofmt_xdate()
plt.title("Magnitude vs Time")
fig.savefig("visualize/earthquake_magnitude_time.png", bbox_inches="tight", dpi=300)
plt.show()

print("All plots generated successfully!")


In [None]:
# 結果をMinIOにアップロード
# MinIOクライアントがないため、結果をローカルに保存のみ
# print("Generated visualization files:")
# viz_files = [
#     "visualize/gamma_catalog.html",
#     "visualize/standard_catalog.html", 
#     "visualize/earthquake_frequency_time.png",
#     "visualize/earthquake_magnitude_frequency.png",
#     "visualize/earthquake_magnitude_time.png"
# ]

# # HypoDD CTがある場合のみファイルリストに追加
# if len(hypodd_ct_catalog) > 0:
#     viz_files.append("visualize/hypodd_ct_catalog.html")

# # HypoDD CCがある場合のみファイルリストに追加
# if hypodd_cc_catalog is not None and os.path.exists("visualize/hypodd_cc_catalog.html"):
#     viz_files.append("visualize/hypodd_cc_catalog.html")

# for filename in viz_files:
#     if os.path.exists(filename):
#         file_size = os.path.getsize(filename)
#         print(f"✓ {filename} ({file_size} bytes)")
#     else:
#         print(f"✗ {filename} (not found)")

# print(f"\nVisualization completed! Generated {len([f for f in viz_files if os.path.exists(f)])} files.")


In [None]:
# Kubeflow Pipelines UI用のメタデータ出力
if os.environ.get('ELYRA_RUNTIME_ENV') == 'kfp':
    # For information about Elyra environment variables refer to
    # https://elyra.readthedocs.io/en/stable/user_guide/best-practices-file-based-nodes.html#proprietary-environment-variables
    
    cc_status = "found" if hypodd_cc_catalog is not None and len(hypodd_cc_catalog) > 0 else "not found/empty"
    ct_status = "found" if len(hypodd_ct_catalog) > 0 else "not found/empty"

    metadata = {
        'outputs': [
            {
                'storage': 'inline',
                'source': f'# Visualization Complete\\n'
                         f'Successfully generated visualizations for {region}:\\n'
                         f'- GaMMA catalog: {len(gamma_catalog)} events\\n'
                         f'- Standard catalog: {len(standard_catalog)} events\\n'
                         f'- HypoDD CT catalog: {len(hypodd_ct_catalog)} events ({ct_status})\\n'
                         f'- HypoDD CC catalog: {cc_status}\\n'
                         f'- Generated 3D plots (HTML)\\n'
                         f'- Generated frequency histograms (PNG)\\n'
                         f'- Files saved to visualize/ directory',
                'type': 'markdown',
            }
        ]
    }

    with open('mlpipeline-ui-metadata.json', 'w') as f:
        json.dump(metadata, f)
        
print("Visualization workflow completed successfully!")
