## Classify emergent detections from STA/LTA on the basis of waveform characteristics
- runs in parallel over the list of detections

In [1]:
import os
import pickle
import glob
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import datetime
import numpy as np
import obspy
from obspy.clients.fdsn.client import Client 
import obspy
import pandas as pd
import scipy.ndimage
import geopy.distance
import random
client = Client('IRIS')
import scipy
import seaborn as sn
import dask
from dask.diagnostics import ProgressBar

## Pull in emergent detections from STA/LTA triggering

In [2]:
network = 'OO'
station = 'HYS14'
channel = 'HHN'
samp_rate = 200

In [3]:
# For all:
file_name='results/HYSB1_HHN_3-10Hz_triggering.pickle'
with open(file_name,'rb') as handle:
    detections = pickle.load(handle)

In [4]:
# Filter the detections based on time, if desired!
t1 = obspy.UTCDateTime(2017,9,1)
t2 = obspy.UTCDateTime(2017,9,8)

t_keep = [i for i,e in enumerate(detections) if (e[0]>t1) & (e[0]<t2)]
detections = [detections[i] for i in t_keep]

## Define classification functions

In [5]:
def pick_peaks_welch(trace,sampling_rate,nperseg_multiple,microseism_cutoff=True):
    """
    Estimate power spectra of a trace using Welch's method
    Pick peaks within the spectra!
    
    INPUTS:
    trace = obspy object, waveform
    sampling_rate = sampling rate of trace
    nperseg_multiple = length of each segment used to construct the Welch spectrum
    microseism_cutoff = Bool, whether or not to cut off the lower end of the spectrum to avoid the microseism
    
    OUTPUTS:
    f = frequencies of the spectra
    Pxx_den = associated power at each frequency, in decibels
    peak_ind = index of peaks within the spectra (f and Pxx_den), if found
    peaks = picked peak object from scipy
    median_power = median power of spectra from 20-80 Hz in decibels
    """
    
    
    fs = sampling_rate
    x = trace.data
    nperseg = fs * nperseg_multiple
    
    f,Pxx_den = scipy.signal.welch(x,fs,nperseg=nperseg)
    if microseism_cutoff is True:
        f = f[4:]
        Pxx_den = Pxx_den[4:]
        
    Pxx_den = [10*np.log10(d) for d in Pxx_den]
    median_power = np.median(Pxx_den[20:80])
    
    peaks = scipy.signal.find_peaks(Pxx_den,threshold =median_power*5,prominence=10) 
    peak_ind = peaks[0]
    
    return(f,Pxx_den,peak_ind,peaks,median_power)

In [6]:
def apply_gaussian(filtered_data,samp_rate,gaussian_width=5):
    """
    Smooth waveform using a gaussian window
    
    INPUTS
    filtered_data = filtered numpy array of seismic data (from an obspy trace)
    samp_rate = sampling rate of data
    gaussian width = width of Gaussian window in seconds
    
    OUTPUTS
    smoothed_window = smoothed numpy array of seismic data
    """
    
    # Square data
    data = filtered_data**2
    
    gaussian_radius = int((gaussian_width * samp_rate)/2)
    smoothed_window=scipy.ndimage.gaussian_filter1d(data,sigma=gaussian_radius/4,radius=gaussian_radius)
    
    return smoothed_window

In [7]:
def ship_noise_classifier(trace,sampling_rate):
    """
    Check whether detection likely includes ship noise in the form of a spectral peak
    
    INPUTS
    trace = obspy trace object
    sampling_rate = sample rate of trace
    
    OUTPUTS
    ship_classifier = number of peaks in the spectra. If any exist, ship noise is likely!
    """
    
    
    # Pick peaks on the smoothed spectrum of the trace (nperseg multiple = 1)
    f,Pxx_den,peak_ind,peak_details,median_power = pick_peaks_welch(trace,sampling_rate,1,microseism_cutoff=True)
    
    if len(peak_ind)==0:
        ship_classifier = 0
    else:
        ship_classifier = len(peak_ind)

    
    return ship_classifier

1. Calculate whether the detection likely includes ship noise (whether there are peaks in the Welch spectra)
2. Perform 15 s Gaussian smoothing on filtered (4-15 Hz) waveform, calculate number of peaks with prominence > 0.1
- 1 peak indicates T-phase, > 1 peak is consistent with tremor
3. Calculate frequency ratio (ratio between power in 5-10 and 10-15 Hz) with 30 s padding on either side
- Frequency ratio > 100 is consistent with tectonic tremor

In [8]:
def classify_detection(t,network,station,channel,samp_rate,filepath):
    """
    INPUTS 
    t = tuple of UTCDatetime, with on and off trigger time of detection
    network = string
    station = string
    channel = string
    samp_rate = sampling rate of channel
    filepath = directory to save classification information to
    
    
    OUTPUTS
    Writes to file!
    t = on and off times of detection
    num_waveform_peaks = number of peaks with prominence > 0 in gaussian smoothed 4-15 Hz filtered waveform; more than 1 indicates a T-phase
    ship_classifier = number of peaks in the Welch spectra, existence of peaks indicates ship noise
    freq_ratio_welch = ratio between normalized decibels of Welch spectrum for 5-10 and 10-15 Hz. Values > 100 indicate tectonic tremor
    max_amplitude = maximum amplitude of filtered waveform
    """
    file_name = filepath +  station + '_' + str(t[0]).split('.')[0] + '.pickle'
    if os.path.isfile(file_name)==True:
        return
    
    # try:
    pad = 0
    t1 = t[0]-pad
    t2 = t[1]+pad

    # Check whether there are peaks in the Welch spectra
    # If ship_classifier > 0, indicates presence of ship noise
    st1 = client.get_waveforms(network,station, "*",channel, t1-5, t2+5,attach_response=True);
    st1.resample(samp_rate).merge(fill_value='interpolate')
    st1[0].data = st1[0].data / st1[0].stats.response.instrument_sensitivity.value # Convert to m/s
    st1.trim(starttime=t1,endtime=t2)
    ship_classifier = ship_noise_classifier(st1[0],samp_rate)

    # Get number of peaks in Gaussian-smoothed waveform
    # If number of peaks = 1, indicates T-phase
    st1 = client.get_waveforms(network,station, "*",channel, t1-5, t2+5,attach_response=True);
    st1.resample(samp_rate).merge(fill_value='interpolate')
    st1.filter('bandpass',freqmin=3,freqmax=10)
    st1.remove_response()
    st1.trim(starttime=t1,endtime=t2)
    max_amplitude = np.max(np.abs(st1[0].data))
    smoothed_window = apply_gaussian(st1[0].data,samp_rate,gaussian_width=15)
    window_max = np.max(smoothed_window) # normalize window by its maximum
    smoothed_window = [i/window_max for i in smoothed_window]
    peaks = scipy.signal.find_peaks(smoothed_window,prominence=.1)
    num_waveform_peaks=len(peaks[0])

    # Calculate frequency ratio with 30 s padding on either side - using Welch
    pad = 30
    t1 = t[0]-pad
    t2 = t[1]+pad
    st2 = client.get_waveforms(network,station, "*",channel, t1-5, t2+5,attach_response=True);
    st2.resample(samp_rate).merge(fill_value='interpolate')
    st2[0].data = st2[0].data / st2[0].stats.response.instrument_sensitivity.value # Convert to m/s
    st2.trim(starttime=t1,endtime=t2)
    f,Pxx_den,peak_ind,peak_details,median_power = pick_peaks_welch(st2[0],samp_rate,5,microseism_cutoff=False)
    normalized_power = Pxx_den
    freq_ratio = 10**(np.median(normalized_power[25:50])/10)/10**(np.median(normalized_power[50:75])/10)
    freq_ratio_welch=freq_ratio

    # Write results to file
    file_name = filepath +  station + '_' + str(t[0]).split('.')[0] + '.pickle'
    with open(file_name, 'wb') as handle:
            pickle.dump([t,station,num_waveform_peaks,ship_classifier,freq_ratio_welch,max_amplitude],handle)
    '''
    except:
        didntwork = 1
    
    '''
    return

## Loop in parallel
Note: simply overwrites files if they already exist

In [9]:
filepath = 'classifications/'

In [10]:
@dask.delayed
def loop_detections(t,network,station,channel,samp_rate,filepath):
    return classify_detection(t,network,station,channel,samp_rate,filepath)



In [11]:
lazy_results = [loop_detections(t,network,station,channel,samp_rate,filepath) for t in detections]

In [12]:
with ProgressBar():
    results = dask.compute(lazy_results,num_workers=5)

[################                        ] | 42% Completed | 25.78 ss


FDSNNoDataException: No data available for request.
HTTP Status code: 204
Detailed response of server:



In [14]:
len(detections)

NameError: name 'triggers' is not defined

## Pull in all saved files of classifications and save to one pickle file

In [34]:
files = glob.glob(filepath+station+'*')

classifications = []
for f in files:
    with open(f,'rb') as handle:
        classi = pickle.load(handle)
        classifications.append(classi)
        
# Sort!
times = [c[0][0] for c in classifications]
sort_ind = np.argsort(times)
classifications = [classifications[i] for i in sort_ind]

In [35]:
## Write all to pickle
file_name = 'EBS3_EH1_3-10Hz_classifications_new.pickle'
pickle.dump(classifications,open(file_name,'wb'))

In [36]:
len(classifications)

316