In [None]:
import json
import os
import pickle
from datetime import datetime, timedelta

import numpy as np
import pandas as pd
from gamma.utils import association, convert_picks_csv, from_seconds
from pyproj import Proj
from tqdm import tqdm

In [None]:
node_i=0
index_json='config/index.json'
config_json='config/config.json'
pick_csv='phasenet/piks.csv'
station_json='staions/stations.json',
gamma_catalog_csv=f"gamma/catalog_{node_i:03d}.csv"
gamma_pick_csv=f"gamma/pics_{node_i:03d}.csv"
bucket_name="catalogs"

In [None]:
catalog_dir = os.path.join("/tmp/", bucket_name)
if not os.path.exists(catalog_dir):
    os.makedirs(catalog_dir)

## read config
with open(index_json, "r") as fp:
    index = json.load(fp)
idx = index[node_i]

with open(config_json, "r") as fp:
    config = json.load(fp)

## read picks
# picks = pd.read_json(pick_json)
picks = pd.read_csv(pick_csv, parse_dates=["phase_time"])
picks["id"] = picks["station_id"]
picks["timestamp"] = picks["phase_time"]
picks["amp"] = picks["phase_amp"]
picks["type"] = picks["phase_type"]
picks["prob"] = picks["phase_score"]

## read stations
# stations = pd.read_csv(station_csv, delimiter="\t")
with open(station_json, "r") as fp:
    stations = json.load(fp)
stations = pd.DataFrame.from_dict(stations, orient="index")
# stations = stations.rename(columns={"station": "id"})
stations["id"] = stations.index
proj = Proj(f"+proj=sterea +lon_0={config['center'][0]} +lat_0={config['center'][1]} +units=km")
stations[["x(km)", "y(km)"]] = stations.apply(
    lambda x: pd.Series(proj(longitude=x.longitude, latitude=x.latitude)), axis=1
)
stations["z(km)"] = stations["elevation(m)"].apply(lambda x: -x / 1e3)

## setting GMMA configs
config["use_dbscan"] = True
config["use_amplitude"] = True
config["method"] = "BGMM"
if config["method"] == "BGMM":  ## BayesianGaussianMixture
    config["oversample_factor"] = 4
if config["method"] == "GMM":  ## GaussianMixture
    config["oversample_factor"] = 1

# Earthquake location
config["dims"] = ["x(km)", "y(km)", "z(km)"]
config["vel"] = {"p": 6.0, "s": 6.0 / 1.73}
config["x(km)"] = (np.array(config["xlim_degree"]) - np.array(config["center"][0])) * config["degree2km"]
config["y(km)"] = (np.array(config["ylim_degree"]) - np.array(config["center"][1])) * config["degree2km"]
config["z(km)"] = (0, 60)
config["bfgs_bounds"] = (
    (config["x(km)"][0] - 1, config["x(km)"][1] + 1),  # x
    (config["y(km)"][0] - 1, config["y(km)"][1] + 1),  # y
    (0, config["z(km)"][1] + 1),  # z
    (None, None),  # t
)

# DBSCAN
config["dbscan_eps"] = 10  # second
config["dbscan_min_samples"] = 3  ## see DBSCAN

# Filtering
config["min_picks_per_eq"] = min(10, len(stations) // 2)
config["min_p_picks_per_eq"] = 0
config["min_s_picks_per_eq"] = 0
config["max_sigma11"] = 2.0  # s
config["max_sigma22"] = 2.0  # m/s
config["max_sigma12"] = 1.0  # covariance

# if use amplitude
if config["use_amplitude"]:
    picks = picks[picks["amp"] != -1]

# print(config)
for k, v in config.items():
    print(f"{k}: {v}")

## run GMMA association
event_idx0 = 1
assignments = []
catalogs, assignments = association(picks, stations, config, event_idx0, method=config["method"])
event_idx0 += len(catalogs)

## create catalog
catalogs = pd.DataFrame(
    catalogs,
    columns=["time"]
    + config["dims"]
    + [
        "magnitude",
        "sigma_time",
        "sigma_amp",
        "cov_time_amp",
        "event_index",
        "gamma_score",
    ],
)

catalogs[["longitude", "latitude"]] = catalogs.apply(
    lambda x: pd.Series(proj(longitude=x["x(km)"], latitude=x["y(km)"], inverse=True)),
    axis=1,
)
catalogs["depth(m)"] = catalogs["z(km)"].apply(lambda x: x * 1e3)

catalogs.sort_values(by=["time"], inplace=True)
with open(gamma_catalog_csv, "w") as fp:
    catalogs.to_csv(
        fp,
        # sep="\t",
        index=False,
        float_format="%.3f",
        date_format="%Y-%m-%dT%H:%M:%S.%f",
        columns=[
            "time",
            "magnitude",
            "longitude",
            "latitude",
            "depth(m)",
            "sigma_time",
            "sigma_amp",
            "cov_time_amp",
            "gamma_score",
            "event_index",
        ],
    )
# catalogs = catalogs[
#     ['time', 'magnitude', 'longitude', 'latitude', 'depth(m)', 'sigma_time', 'sigma_amp']
# ]

## add assignment to picks
assignments = pd.DataFrame(assignments, columns=["pick_index", "event_index", "gamma_score"])
picks = picks.join(assignments.set_index("pick_index")).fillna(-1).astype({"event_index": int})
picks.sort_values(by=["timestamp"], inplace=True)
with open(gamma_pick_csv, "w") as fp:
    picks.to_csv(
        fp,
        # sep="\t",
        index=False,
        date_format="%Y-%m-%dT%H:%M:%S.%f",
        columns=[
            "station_id",
            "phase_time",
            "phase_type",
            "phase_score",
            "phase_amp",
            "gamma_score",
            "event_index",
        ],
    )

## upload to s3 bucket
# try:
#     from minio import Minio

#     minioClient = Minio(s3_url, access_key="minio", secret_key="minio123", secure=secure)
#     if not minioClient.bucket_exists(bucket_name):
#         minioClient.make_bucket(bucket_name)

#     minioClient.fput_object(
#         bucket_name,
#         f"{config['region']}/gamma/catalog_{node_i:03d}.csv",
#         gamma_catalog_csv,
#     )

#     minioClient.fput_object(
#         bucket_name,
#         f"{config['region']}/gamma/picks_{node_i:03d}.csv",
#         gamma_pick_csv,
#     )

# except Exception as err:
#     print(f"ERROR: can not access minio service! \n{err}")
#     pass

## upload to mongodb
# try:
#     from pymongo import MongoClient

#     username = "root"
#     password = "quakeflow123"
#     client = MongoClient(f"mongodb://{username}:{password}@127.0.0.1:27017")
#     db = client["quakeflow"]
#     collection = db["waveform"]
#     for i, p in tqdm(picks.iterrows(), desc="Uploading to mongodb"):
#         collection.update(
#             {"_id": f"{p['station_id']}_{p['timestamp'].isoformat(timespec='milliseconds')}_{p['type']}"},
#             {"$set": {"event_index": p["event_index"]}},
#         )
# except Exception as err:
#     print(f"ERROR: can not access mongodb service! \n{err}")
#     pass

# return f"catalog_{node_i:03d}.csv"

In [None]:
# Kubeflow Pipelines UI用のメタデータ出力
if os.environ.get('ELYRA_RUNTIME_ENV') == 'kfp':
    # For information about Elyra environment variables refer to
    # https://elyra.readthedocs.io/en/stable/user_guide/best-practices-file-based-nodes.html#proprietary-environment-variables

    metadata = {
        'outputs': [
            {
                'storage': 'inline',
                'source': f'# Run GaMMA Complete\n',
                'type': 'markdown',
            }
        ]
    }

    with open('mlpipeline-ui-metadata.json', 'w') as f:
        json.dump(metadata, f)