<span style="color:red; font-family:Helvetica Neue, Helvetica, Arial, sans-serif; font-size:2em;">An Exception was encountered at '<a href="#papermill-error-cell">In [1]</a>'.</span>

<span id="papermill-error-cell" style="color:red; font-family:Helvetica Neue, Helvetica, Arial, sans-serif; font-size:2em;">Execution using papermill encountered an exception here and stopped:</span>

In [None]:
import json
import os
import pickle
from datetime import datetime, timedelta

import numpy as np
import pandas as pd
from gamma.utils import association, convert_picks_csv, from_seconds
from pyproj import Proj
from tqdm import tqdm

In [None]:
node_i=0
index_json='config/index.json'
config_json='config/config.json'
pick_csv='phasenet/picks.csv'
station_json='stations/stations.json'

gamma_dir = "gamma"
if not os.path.exists(gamma_dir):
    os.makedirs(gamma_dir)

gamma_catalog_csv=f"gamma/catalog_{node_i:03d}.csv"
gamma_pick_csv=f"gamma/picks_{node_i:03d}.csv"

bucket_name="catalogs"

In [None]:
catalog_dir = os.path.join("/tmp/", bucket_name)
if not os.path.exists(catalog_dir):
    os.makedirs(catalog_dir)

## read config
with open(index_json, "r") as fp:
    index = json.load(fp)
idx = index[node_i]

with open(config_json, "r") as fp:
    config = json.load(fp)

## read picks
picks = pd.read_csv(pick_csv, parse_dates=["phase_time"])
picks["id"] = picks["station_id"]
picks["timestamp"] = picks["phase_time"]
picks["amp"] = picks["phase_amp"]
picks["type"] = picks["phase_type"]
picks["prob"] = picks["phase_score"]

## read stations
with open(station_json, "r") as fp:
    stations = json.load(fp)
stations = pd.DataFrame.from_dict(stations, orient="index")
stations["id"] = stations.index
proj = Proj(f"+proj=sterea +lon_0={config['center'][0]} +lat_0={config['center'][1]} +units=km")
stations[["x(km)", "y(km)"]] = stations.apply(
    lambda x: pd.Series(proj(longitude=x.longitude, latitude=x.latitude)), axis=1
)
stations["z(km)"] = stations["elevation(m)"].apply(lambda x: -x / 1e3)

## setting GMMA configs
config["use_dbscan"] = True
config["use_amplitude"] = True
config["method"] = "BGMM"
if config["method"] == "BGMM":  ## BayesianGaussianMixture
    config["oversample_factor"] = 4
if config["method"] == "GMM":  ## GaussianMixture
    config["oversample_factor"] = 1

# Earthquake location
config["dims"] = ["x(km)", "y(km)", "z(km)"]
config["vel"] = {"p": 6.0, "s": 6.0 / 1.73}
config["x(km)"] = (np.array(config["xlim_degree"]) - np.array(config["center"][0])) * config["degree2km"]
config["y(km)"] = (np.array(config["ylim_degree"]) - np.array(config["center"][1])) * config["degree2km"]
config["z(km)"] = (0, 60)
config["bfgs_bounds"] = (
    (config["x(km)"][0] - 1, config["x(km)"][1] + 1),  # x
    (config["y(km)"][0] - 1, config["y(km)"][1] + 1),  # y
    (0, config["z(km)"][1] + 1),  # z
    (None, None),  # t
)

# DBSCAN
config["dbscan_eps"] = 10  # second
config["dbscan_min_samples"] = 3  ## see DBSCAN

# Filtering
config["min_picks_per_eq"] = min(10, len(stations) // 2)
config["min_p_picks_per_eq"] = 0
config["min_s_picks_per_eq"] = 0
config["max_sigma11"] = 2.0  # s
config["max_sigma22"] = 2.0  # m/s
config["max_sigma12"] = 1.0  # covariance

# if use amplitude
if config["use_amplitude"]:
    picks = picks[picks["amp"] != -1]

# print(config)
for k, v in config.items():
    print(f"{k}: {v}")

## run GMMA association
event_idx0 = 1
assignments = []
catalogs, assignments = association(picks, stations, config, event_idx0, method=config["method"])
event_idx0 += len(catalogs)

## create catalog
catalogs = pd.DataFrame(
    catalogs,
    columns=["time"]
    + config["dims"]
    + [
        "magnitude",
        "sigma_time",
        "sigma_amp",
        "cov_time_amp",
        "event_index",
        "gamma_score",
    ],
)

catalogs[["longitude", "latitude"]] = catalogs.apply(
    lambda x: pd.Series(proj(longitude=x["x(km)"], latitude=x["y(km)"], inverse=True)),
    axis=1,
)
catalogs["depth(m)"] = catalogs["z(km)"].apply(lambda x: x * 1e3)

catalogs.sort_values(by=["time"], inplace=True)
with open(gamma_catalog_csv, "w") as fp:
    catalogs.to_csv(
        fp,
        index=False,
        float_format="%.3f",
        date_format="%Y-%m-%dT%H:%M:%S.%f",
        columns=[
            "time",
            "magnitude",
            "longitude",
            "latitude",
            "depth(m)",
            "sigma_time",
            "sigma_amp",
            "cov_time_amp",
            "gamma_score",
            "event_index",
        ],
    )

## add assignment to picks
assignments = pd.DataFrame(assignments, columns=["pick_index", "event_index", "gamma_score"])
picks = picks.join(assignments.set_index("pick_index")).fillna(-1).astype({"event_index": int})
picks.sort_values(by=["timestamp"], inplace=True)
with open(gamma_pick_csv, "w") as fp:
    picks.to_csv(
        fp,
        index=False,
        date_format="%Y-%m-%dT%H:%M:%S.%f",
        columns=[
            "station_id",
            "phase_time",
            "phase_type",
            "phase_score",
            "phase_amp",
            "gamma_score",
            "event_index",
        ],
    )

# 出力ファイルの存在確認
if os.path.exists(gamma_catalog_csv) and os.path.exists(gamma_pick_csv):
    print(f"Successfully created output files for node {node_i}")
else:
    raise Exception(f"Failed to create output files for node {node_i}")

In [None]:
picks_csv = "gamma/gamma_picks.csv"
catalog_csv = "gamma/gamma_catalog.csv"

with open(index_json, "r") as fp:
    index = json.load(fp)
    
with open(config_json, "r") as fp:
    config = json.load(fp)



# files_catalog = sorted(glob(tmp_path("catalog_*.csv")))
# files_picks = sorted(glob(tmp_path("picks_*.csv")))
files_catalog = [f"gamma/catalog_{node_i:03d}.csv" for node_i in range(len(index))]
files_picks = [f"gamma/picks_{node_i:03d}.csv" for node_i in range(len(index))]
print(f"Merge catalog: {files_catalog}")
print(f"Merge picks: {files_picks}")

if len(files_catalog) > 0:
    catalog_list = []
    for f in files_catalog:
        tmp = pd.read_csv(f, dtype=str)
        tmp["file_index"] = f.rstrip(".csv").split("_")[-1]
        catalog_list.append(tmp)
    merged_catalog = pd.concat(catalog_list).sort_values(by="time")
    merged_catalog["match_id"] = merged_catalog.apply(lambda x: f'{x["event_index"]}_{x["file_index"]}', axis=1)
    merged_catalog.sort_values(by="time", inplace=True, ignore_index=True)
    merged_catalog.drop(columns=["event_index", "file_index"], inplace=True)
    merged_catalog["event_index"] = merged_catalog.index.values + 1
    mapping = dict(zip(merged_catalog["match_id"], merged_catalog["event_index"]))
    merged_catalog.drop(columns=["match_id"], inplace=True)
    merged_catalog.to_csv(catalog_csv, index=False)
    del merged_catalog

    pick_list = []
    for f in files_picks:
        tmp = pd.read_csv(f, dtype=str)
        tmp["file_index"] = f.rstrip(".csv").split("_")[-1]
        pick_list.append(tmp)
    merged_picks = pd.concat(pick_list).sort_values(by="phase_time")
    merged_picks["match_id"] = merged_picks.apply(lambda x: f'{x["event_index"]}_{x["file_index"]}', axis=1)
    merged_picks.drop(columns=["event_index", "file_index"], inplace=True)
    merged_picks["event_index"] = merged_picks["match_id"].apply(lambda x: mapping[x] if x in mapping else -1)
    merged_picks.drop(columns=["match_id"], inplace=True)
    merged_picks.to_csv(picks_csv, index=False)
    del merged_picks

In [None]:
# Kubeflow Pipelines UI用のメタデータ出力
if os.environ.get('ELYRA_RUNTIME_ENV') == 'kfp':
    # For information about Elyra environment variables refer to
    # https://elyra.readthedocs.io/en/stable/user_guide/best-practices-file-based-nodes.html#proprietary-environment-variables

    metadata = {
        'outputs': [
            {
                'storage': 'inline',
                'source': f'# Run GaMMA Complete\n',
                'type': 'markdown',
            }
        ]
    }

    with open('mlpipeline-ui-metadata.json', 'w') as f:
        json.dump(metadata, f)