In [1]:
# Associator
# Find an association of the waveforms

In [1]:
import sys
import numpy as np
import pandas as pd
import os
import json
import copy
import requests
import tensorflow as tf
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)
from collections import deque
from math import ceil
from tensorflow.keras.models import load_model
from scipy.cluster.hierarchy import ward, fcluster
from scipy.spatial.distance import squareform
from tensorflow.python.util import deprecation
deprecation._PRINT_DEPRECATION_WARNINGS = False
from tensorflow import keras as ks
from obspy import UTCDateTime
from dateutil import parser

from flask import Flask, request, jsonify
# Import configuration paramaters for pipeline
import pconf

In [2]:
from tensorflow.keras.losses import binary_crossentropy as BCE
from tensorflow.keras import backend as K
def nzHaversine(ytrue, ypred):
    observation = tf.stack([ytrue[:,:,0]*latRange + extents[0], ytrue[:,:,1]*lonRange + extents[2]],axis=2)*0.017453292519943295
    prediction = tf.stack([ypred[:,:,0]*latRange + extents[0], ypred[:,:,1]*lonRange + extents[2]],axis=2)*0.017453292519943295
    used = tf.reduce_sum(tf.cast(tf.greater(tf.reduce_sum(ytrue, axis=2),0), dtype=tf.float32), axis=1)
    dlon_dlat = observation - prediction 
    v = dlon_dlat / 2
    v = tf.sin(v)
    v = v**2
    a = v[:,:,1] + tf.cos(observation[:,:,1]) * tf.cos(prediction[:,:,1]) * v[:,:,0]
    c = tf.sqrt(a)
    c = 2*tf.math.asin(c)
    c = c*6378.1
    final = tf.reduce_sum((tf.reduce_sum(c, axis=1))/used)
    final = final/tf.dtypes.cast(tf.shape(observation)[0], dtype= tf.float32)
    return final

def nzDepth(ytrue, ypred):
    used = maxArrivals - tf.reduce_sum(tf.cast(tf.equal(ytrue,0), dtype=tf.float32), axis=1)
    used = tf.where(tf.equal(used, 0.), 1., used)
    diffs = abs(tf.squeeze(ypred)-ytrue)*extents[4]
    diffs = tf.reduce_sum(tf.reduce_sum(diffs, axis=1)/used)
    return diffs/tf.dtypes.cast(tf.shape(ytrue)[0], dtype= tf.float32)

def nzTime(ytrue, ypred):
    used = maxArrivals - tf.reduce_sum(tf.cast(tf.equal(ytrue,0), dtype=tf.float32), axis=1)
    used = tf.where(tf.equal(used, 0.), 1., used)
    diffs = abs(tf.squeeze(ypred)-ytrue)*timeNormalize
    diffs = tf.reduce_sum(tf.reduce_sum(diffs, axis=1)/used)
    return diffs/tf.dtypes.cast(tf.shape(ytrue)[0], dtype= tf.float32)
    
def nzMSE1(ytrue, ypred):
    used = maxArrivals - tf.reduce_sum(tf.cast(tf.equal(ytrue,0), dtype=tf.float32), axis=1)
    used = tf.where(tf.equal(used, 0.), 1., used)
    return K.mean(tf.reduce_sum(K.square(tf.squeeze(ypred)-ytrue),axis=1)/used)

def nzMSE2(ytrue, ypred):
    used = tf.reduce_sum(tf.cast(tf.greater(tf.reduce_sum(ytrue, axis=-1),0), dtype=tf.float32), axis=1)
    used = tf.where(tf.equal(used, 0.), 1., used)
    return K.mean(tf.reduce_sum(K.square(ypred-ytrue),axis=[1,2])/used)

def nzBCE(ytrue, ypred):
    used = tf.reduce_sum(tf.cast(tf.greater(tf.reduce_sum(ytrue, axis=1),0), dtype=tf.float32), axis=1)
    used = tf.where(tf.equal(used, 0.), 1., used)
    return K.mean(tf.reduce_sum(BCE(ytrue, ypred),axis=1)/used)

def nzAccuracy(ytrue, ypred):
    m = matrixSize/(tf.reduce_sum(tf.cast(tf.greater(tf.reduce_sum(ytrue, axis=1),0), dtype=tf.float32), axis=1)**2)
    acc = tf.reduce_sum(tf.cast(ytrue==tf.round(ypred), dtype=tf.float32),axis=(1,2))/matrixSize
    return K.mean(acc*m - m + 1)

In [3]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

In [8]:
model_file = os.path.join(pconf.model_folder, pconf.assoc_model)
model = load_model(model_file, custom_objects={'nzBCE':nzBCE, 'nzMSE1':nzMSE1, 'nzMSE2':nzMSE2, 'nzAccuracy':nzAccuracy, 'nzHaversine':nzHaversine, 'nzDepth':nzDepth, 'nzTime':nzTime}, compile=True)

In [9]:
app = Flask(__name__)
app_name = 'assoc'
napp = 'next'

In [10]:
# Test up method
@app.route('/')
def apitest():
    return f'{app_name} is working'

In [11]:
extents = [22.0, 40.0, 33.0, 62.0, 50.0] #latMin, latMax, lonMin, lonMax; Normalizing factor for model--do not change
latRange = abs(extents[1] - extents[0])
lonRange = abs(extents[3] - extents[2])
outFile = "assocOut.csv"
maxArrivals = 50
minArrivals = 3
timeNormalize = 120 # Normalizing factor for model; do not change
associationWindow = 300
clusterStrength = 0.9
phases = {'P': 0, 'Pg': 0, 'PcP': 0, 'Pb': 0, 'p': 0, 'pg': 0, 'pcp': 0, 'pb': 0,
          'Pn': 1, 'pn': 1,
          'S': 2, 'Sg': 2, 'ScP': 2, 'Lg': 2, 'Sb': 2, 's': 2, 'sg': 2, 'scp': 2, 'lg': 2,
          'Sn': 3, 'sn': 3}

# Build permutation lists and matrices to predict on
def permute():
    outerWindow = associationWindow
    edgeWindow = outerWindow/5
    numWindows = ceil((X[:,2].max() + edgeWindow*2) / edgeWindow)
    innerWindows = deque()
    X_perm = deque()
    start = -edgeWindow
    for window in range(numWindows):
        end = start+outerWindow
        windowArrivals = np.where((X[:,2] >= start) & (X[:,2] < end))[0]
        start += edgeWindow
        if len(windowArrivals) >= minArrivals:
            X_perm.append(windowArrivals[:maxArrivals])
            innerWindows.append(start)
    X_test = np.zeros((len(X_perm),maxArrivals,5))
    for i in range(len(X_perm)):
        X_test[i,:len(X_perm[i])] = X[X_perm[i]]
        X_test[i,:len(X_perm[i]),2] -= X_test[i,0,2]
    X_test[:,:,2] /= timeNormalize
    return X_perm, X_test, innerWindows

def buildEvents(X_perm, X_test, Y_pred, innerWindows):
    # Get clusters for predicted matrix at index i
    def cluster(i):
        valids = np.where(X_test[i][:,4])[0]
        validPreds = Y_pred[0][i][valids,:len(valids)]
        L = 1-((validPreds.T + validPreds)/2)
        np.fill_diagonal(L,0)
        return fcluster(ward(squareform(L)), clusterStrength, criterion='distance')

    innerWindow = associationWindow * (3/5)
    catalogue = pd.DataFrame(columns=labels.columns)
    events = deque()
    evid = 1
    for window in range(len(X_perm)):
        clusters = cluster(window)
        for c in np.unique(clusters):
            pseudoEventIdx = np.where(clusters == c)[0]
            pseudoEvent = X_perm[window][pseudoEventIdx]
            if len(pseudoEvent) >= minArrivals: #TODO remove this
                event = X[pseudoEvent]
                # check for containment within inner window
                contained = (event[0,2] >= innerWindows[window]) & (event[-1,2] <= (innerWindows[window]+innerWindow))
                if contained:
                    candidate = labels.iloc[pseudoEvent].copy()
                    candidate['LAT'] = np.median(Y_pred[1][window][pseudoEventIdx][:,0])*latRange+extents[0]
                    candidate['LON'] = np.median(Y_pred[1][window][pseudoEventIdx][:,1])*lonRange+extents[2]
                    candidate['DEPTH'] = np.median(Y_pred[2][window][pseudoEventIdx])*extents[4]
                    candidate['ETIME'] = candidate.TIME.iloc[0]+(np.median(Y_pred[3][window][pseudoEventIdx])*timeNormalize)
                    # check for existence in catalogue
                    overlap = candidate.ARID.isin(catalogue.ARID).sum()
                    if overlap == 0:
                        print("\rPromoting event " + str(evid), end='')
                        events.append(pseudoEvent)
                        candidate.EVID = evid
                        catalogue = catalogue.append(candidate)
                        evid += 1
                    elif len(pseudoEvent) > overlap:
                        catalogue.drop(catalogue[catalogue.ARID.isin(candidate.ARID)].index, inplace=True)
                        candidate.EVID = evid
                        catalogue = catalogue.append(candidate)
                        evid += 1
    catalogue = catalogue.groupby('EVID').filter(lambda x: len(x) >= minArrivals)
    print()
    return events, catalogue

def matrixLink(X, labels):
    print("Creating permutations... ", end='')
    X_perm, X_test, innerWindows = permute()
    print("predicting... ", end='')
    Y_pred = model.predict({"phase": X_test[:,:,3], "numerical_features": X_test[:,:,[0,1,2,4]]})
    print("clustering and building events...")
    events, catalogue = buildEvents(X_perm, X_test, Y_pred, innerWindows)
    return catalogue

def processInput(inputs):
    print("Reading input file... ", end='')
    X = []
    labels = []
    for i, r in inputs.iterrows(): # I can do this better
        phase = r.PHASE
        time = UTCDateTime(r.TIME)
        lat = abs((r.ST_LAT - extents[0]) / latRange)
        lon = abs((r.ST_LON - extents[2]) / lonRange)
        otime = time - UTCDateTime(0)
        try:
            arrival = [lat, lon, otime, phases[phase], 1]
            X.append(arrival)
            labels.append(r)
        except Exception as e:
            print(e)
    X = np.array(X)
    idx = np.argsort(X[:,2])
    X = X[idx,:]
    X[:,2] -= X[0,2]
    labels = pd.DataFrame([labels[i] for i in idx])
    print("%d arrivals found" % len(labels))
    return X, labels

In [12]:
# This is the URI that process the received requests
@app.route(f'/{app_name}', methods=['POST'])
def process_request():
    pdict = request.json
    inFile = pdict['cat']
    inputs = pd.read_csv(inFile, names=['ARID', 'PHASE', 'TIME', 'STA', 'ST_LAT', 'ST_LON']).drop_duplicates(['PHASE','TIME','STA']).sort_values(by=['TIME'])
    inputs.loc[:,'TIME'] = inputs.TIME.apply(lambda x: UTCDateTime(parser.parse(x))-UTCDateTime(0))
    inputs.loc[:,'ST_LAT'] = inputs.ST_LAT.apply(lambda x: float(x))
    inputs.loc[:,'ST_LON'] = inputs.ST_LON.apply(lambda x: float(x))
    inputs['EVID'] = -1

    X, labels = processInput(inputs)
    outputs = matrixLink(X, labels)
    outputs.to_csv(pdict['events'])
    print("{} events created".format(outputs.EVID.nunique()))
    print(outputs)
    forward(pdict)
    return ('', 204)

In [13]:
test = pd.read_csv('clean_catalog.csv', names=['ARID', 'PHASE', 'TIME', 'STA', 'ST_LAT', 'ST_LON'])[1:].drop_duplicates(['PHASE','TIME','STA']).sort_values(by=['TIME'])
test.loc[:,'TIME'] = test.TIME.apply(lambda x: UTCDateTime(parser.parse(x))-UTCDateTime(0))
test.loc[:,'ST_LAT'] = test.ST_LAT.apply(lambda x: float(x))
test.loc[:,'ST_LON'] = test.ST_LON.apply(lambda x: float(x))
test['EVID'] = -1
X, labels = processInput(test)
outputs = matrixLink(X, labels)
outputs

Reading input file... 263 arrivals found
Creating permutations... predicting... clustering and building events...
Promoting event 49


Unnamed: 0,ARID,PHASE,TIME,STA,ST_LAT,ST_LON,EVID,LAT,LON,DEPTH,ETIME
64,346,sn,1.590970e+09,HLID,43.5625,-114.4138,1,33.655364,50.977186,14.255042,1.590970e+09
41,321,sn,1.590970e+09,DUG,40.1950,-112.8133,1,33.655364,50.977186,14.255042,1.590970e+09
66,338,pg,1.590970e+09,HLID,43.5625,-114.4138,1,33.655364,50.977186,14.255042,1.590970e+09
1,297,pn,1.590970e+09,BMO,44.8525,-117.3060,3,33.820355,50.923999,15.874024,1.590970e+09
2,299,pg,1.590970e+09,BMO,44.8525,-117.3060,3,33.820355,50.923999,15.874024,1.590970e+09
...,...,...,...,...,...,...,...,...,...,...,...
151,139,pn,1.591146e+09,HLID,43.5625,-114.4138,48,33.906388,50.468015,16.751945,1.591146e+09
248,217,sn,1.591146e+09,TPNV,36.9488,-116.2495,48,33.906388,50.468015,16.751945,1.591146e+09
63,128,sn,1.591146e+09,HAWA,46.3925,-119.5326,49,33.909687,50.585472,18.900292,1.591146e+09
250,205,pn,1.591146e+09,TPNV,36.9488,-116.2495,49,33.909687,50.585472,18.900292,1.591146e+09


In [14]:
def forward(json_data):
    response = requests.post(f'http://{pconf.host}:{pconf.apps[napp]}/{napp}', json = json_data, headers = pconf.head)
    return response

In [None]:
if __name__ == '__main__':
    app.run(pconf.host, debug=False, port=pconf.apps[app_name])

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://127.0.0.1:6005/ (Press CTRL+C to quit)
