In [None]:
# notebook to run all the seismic analysis and processing in Olinger et al 2021

In [None]:
import numpy as np
import pathlib
import time
import h5py
import obspy
import types
import pyasdf
from datetime import datetime
import matplotlib.pyplot as plt
from detection.stalta_detector import stalta_detector
from detection.master_event_correlation import correlate_master
from detection.master_event_correlation import threshold_detections
from detection.master_event_correlation import plot_catalog
from detection.template_match import make_templates
from detection.template_match import template_match
from detection.template_match import detection_timeseries
from clustering.clustering import get_input_waveforms
from clustering.clustering import load_waveform
from clustering.clustering import cluster_events
from clustering.clustering import plot_clusters

In [None]:
'''

Download data and remove instrumental response

'''

# specify path to save data- folders will be created in this directory
data_path = "/media/Data/Data/PIG/MSEED/"
xml_path = "/media/Data/Data/PIG/XML/HH/"

# pull the data from IRIS servers

# remove instrumental response

# update path to data so subsequent cells will use data with the instrumental response removed
data_path = data_path + "noIR/"

In [None]:
'''

Run 2-band sta-lta detector to find template events

'''

# initialize detection parameter object and set parameters for stalta
d = types.SimpleNamespace()
d.low_freq = [0.01,1]
d.high_freq = [1,10]
d.tolerance = 120
d.low_thresh_on = 8
d.low_thresh_off = 2
d.high_thresh_on = 20
d.high_thresh_off = 2
d.sta_len = 10
d.lta_len = 300
d.num_stations = 3

# specify window to pull template around detection in seconds
d.buffer = [2*60,3*60]

# specify paths to data and metadata
d.data_path = data_path
d.xml_path = xml_path

# select number of processors
d.n_procs = 10

# run the detector and save ASDF dataset with waveforms and metadata
stalta_detector(d)

In [None]:
'''

Cross correlate master event with the rest of the detections to identify best templates

'''

# set normalized cross correlation threshold for making templates
threshold = 0.9

# load catalog in read-only mode
ds = pyasdf.ASDFDataSet("outputs/detections/stalta_catalog.h5",mode='r')

# get waveforms for a single station 
waveforms = ds.waveforms.XC_PIG2.stream

# filter waveforms
freq = [0.05,1]
waveforms.taper(max_percentage=0.1, max_length=30.)
waveforms.filter("bandpass",freqmin=freq[0],freqmax=freq[1])

# set master event for correlation after plotting to see if it looks dispersive and has high SNR
for station in ds.ifilter(ds.q.starttime == "2013-07-10T00:33:54.290001Z",ds.q.station=="PIG2"):
    master_event = station.stream
    master_event.taper(max_percentage=0.1, max_length=30.)
    master_event.filter("bandpass",freqmin=freq[0],freqmax=freq[1])
    master_event.plot()
    
# correlate master with all other waveforms
correlation_coefficients, shifts = correlate_master(master_event,waveforms,"detection/stalta")

# apply threshold to choose best templates and make plots
threshold_detections(waveforms,correlation_coefficients,shifts,threshold)

# close ASDF dataset
del ds

In [None]:
'''

Make ASDF dataset of templates for use in multiband template matching procedure

''' 

# set frequency limits for low and high frequency templates
low_freq = [0.05,1]
high_freq = [1,10]

# load catalog in read-only mode
ds = pyasdf.ASDFDataSet("outputs/detections/stalta_catalog.h5",mode='r')

# load list of template times
template_file = h5py.File('outputs/detections/template_times.h5', 'r')
template_times = list(template_file['timestamps'])
template_file.close()

# make all the templates
make_templates(ds,template_times,high_freq,low_freq,xml_path)

In [None]:
'''

Plot each template to verify quality

''' 

# open ASDF dataset 
ds = pyasdf.ASDFDataSet("outputs/detections/templates.h5",mode='r')

# make plot of both frequency bands 
for event in ds.events:
    for station in ds.ifilter(ds.q.event == event):
        station.low.plot()
        station.high.plot()

In [None]:
'''

Delete a few templates that are unsuitable for use in template matching

''' 

# delete a specific station's record of a particular event
for station in ds.ifilter(ds.q.starttime == "2013-03-23T05:46:55.24",ds.q.station == "PIG4"):
    del station.low
    del station.high
    
# close the dataset
del ds

In [None]:
'''

Run 2-band template matching detection procedure

'''

# initialize detection parameter object and set parameters for template matching
d = types.SimpleNamespace()
d.low_freq = [0.05,1]
d.high_freq = [1,10]
d.tolerance = 120
d.low_thresh_on = 0.3
d.low_thresh_off = 0.1
d.high_thresh_on = 0.2
d.high_thresh_off = 0.1
d.num_stations = 3

# specify window to pull template around detection in seconds
d.buffer = [2*60,3*60]

# specify paths to data and metadata
d.data_path = data_path
d.xml_path = xml_path
                            
# select number of processors
d.n_procs = 18

# run the parallel template matching code
template_match(d)

In [None]:
'''

Plots the results of template matching

'''

# load catalog
ds = pyasdf.ASDFDataSet("outputs/detections/template_matching_catalog.h5",mode='a')

# plot waveforms of each detection using obspy (slow and not recommended)
#plot_detections(ds)

# make basic histogram of detection times
detection_timeseries(ds,"outputs/detections/template_matching_timeseries.png")

# get waveforms for a single station 
ds.single_item_read_limit_in_mb=6000
waveforms = ds.waveforms.XC_PIG2.stream

# filter waveforms
freq = [0.05,1]
waveforms.taper(max_percentage=0.1, max_length=30.)
waveforms.filter("bandpass",freqmin=freq[0],freqmax=freq[1])

# set master event for correlation after plotting to see if it looks dispersive and has high SNR
for station in ds.ifilter(ds.q.starttime == "2013-07-10T00:33:55.240001Z" ,ds.q.station=="PIG2"):
    master_event = station.stream
    master_event.taper(max_percentage=0.1, max_length=30.)
    master_event.filter("bandpass",freqmin=freq[0],freqmax=freq[1])
    master_event.plot()
    
# correlate master with all other waveforms
correlation_coefficients, shifts = correlate_master(master_event,waveforms,"detection/template_matching")

# apply threshold to choose best templates and make plots
plot_catalog(waveforms,correlation_coefficients,shifts)

# close ASDF dataset
del ds

In [None]:
'''

Cluster the catalog with k-shape

'''

# load catalog in read-only mode
ds = pyasdf.ASDFDataSet("outputs/detections/template_matching_catalog.h5",mode='r')

# initialize  parameter object and set parameters for clustering
c = types.SimpleNamespace()
c.station = "PIG2"
c.component_order = ["Z","N","E"]
c.freq = [0.05,1]
c.num_clusters = 2
c.trace_length = 500
c.data_path = data_path

# get matrix of 3D waveforms from a single station
waveforms = get_input_waveforms(ds,c)

# save waveforms
home_dir = str(pathlib.Path().absolute())
waveform_file = h5py.File("/media/Data/Data/input_waveforms.h5",'w')
waveform_file.create_dataset("waveforms",data=waveforms)
waveform_file.close()

# load waveforms
waveform_file = h5py.File("/media/Data/Data/input_waveforms.h5",'r')
waveforms = np.array(waveform_file['waveforms'])
waveform_file.close()

# run clustering
cluster_events(c,waveforms)

In [None]:
'''

Make plots of the clustered waveforms

'''

# load waveforms
# waveform_file = h5py.File("/media/Data/Data/input_waveforms.h5",'r')
# waveforms = np.array(waveform_file['waveforms'])
# waveform_file.close()

# load clustering results
home_dir = str(pathlib.Path().absolute())
cluster_file = h5py.File(home_dir + "/outputs/clustering/" + str(c.num_clusters) + "_cluster_results.h5","r")
predictions = np.array(list(cluster_file["cluster_index"]))
centroids = list(cluster_file["centroids"])
cluster_file.close()

# load catalog
ds = pyasdf.ASDFDataSet("outputs/detections/template_matching_catalog.h5",mode='r')

for cluster in range(len(centroids)):
    
    # get obspy stream with centroid
    master_event = obspy.Stream(obspy.Trace(centroids[cluster].ravel()))

    # put 3-component waves used for clustering in obspy streams
    waveform_stream = obspy.Stream([])
    for waveform in waveforms[predictions == cluster]:
        waveform_stream += obspy.Trace(waveform)
    
    # correlate waves in each cluster with their centroid
    correlation_coefficients, shifts = correlate_master(master_event,waveform_stream,"clustering/cluster_"+str(cluster))

    # read correlation results
    correlation_file = h5py.File("outputs/clustering/cluster_" + str(cluster) + "_correlations.h5",'r')
    correlation_coefficients = np.array(correlation_file['correlation_coefficients'])
    shifts = np.array(correlation_file['shifts'])
    correlation_file.close()
    
    # make plots of waveforms from each cluster
    plot_clusters(c,cluster,centroids[cluster].ravel(),waveforms[predictions == cluster],correlation_coefficients,shifts)
    
    # make basic histogram of detection times for this cluster
    detection_timeseries(ds,"outputs/clustering/cluster_" + str(cluster) + "_timeseries.png",cluster,predictions)
    
# close ASDF dataset
del ds