# Gappy Data Preprocessing Tests  
:auth: Nathan T. Stevens  
:email: ntsteven at uw.edu  
:org: Pacific Northwest Seismic Network  


In [1]:
# Import modules
import os
import sys
import numpy as np
from tqdm import tqdm
from obspy import read, UTCDateTime, Stream
from glob import glob
import prediction_methods as ml
import matplotlib.pyplot as plt

In [None]:
# First call interactive matplotlib plotting utilities...
%matplotlib widget

In [None]:
# THEN do pyrocko.obspy_compat. Doing this second allows %matplotlib widget and snuffler
# to coexist. Calling obspy_compat.plant() first messes with the MacOSX backend needed by 
# %matplotlib widget on Apple-based installations
from pyrocko import obspy_compat
obspy_compat.plant()

In [5]:
# Define path to MSEED day_volumes
DATA_DIR = os.path.join('/Volumes', 'TheWall', 'PNW_Store_Local')
# Compose search string for glob.glob
GLOB_STR = os.path.join(DATA_DIR, 'PNW2017','UW','2017','131','*')
# Define time bounds for obspy.read
TS = UTCDateTime(2017,5,11,13,45)
TE = UTCDateTime(2017,5,11,14,5)

In [6]:
# Get data with gaps
flist = glob(GLOB_STR)
flist.sort()
stream = Stream()
for _f in tqdm(flist):
    stream += read(_f, fmt='MSEED')
stream.trim(starttime=TS, endtime=TE)


100%|██████████| 27/27 [00:07<00:00,  3.52it/s]


2692 Trace(s) in Stream:

UW.ALKI..HNE | 2017-05-11T13:45:00.000000Z - 2017-05-11T14:05:00.000000Z | 200.0 Hz, 240001 samples
...
(2690 other traces)
...
UW.VVHS..ENZ | 2017-05-11T13:56:32.990000Z - 2017-05-11T14:05:00.000000Z | 100.0 Hz, 50702 samples

[Use "print(Stream.__str__(extended=True))" to print all Traces]

In [7]:
# Get model
model, device = ml.initialize_EQT_model()

# Test 1: Do Nothing
Check if gappy data produces valid (non-NaN, but perhaps still gappy) model values


In [11]:
# Prepare data
# Merge key-word-arguments
mkwargs = {'method':1,
           'interpolation_samples':5
          }
ikwargs = {'method':'weighted_average_slopes',
           'no_filter': False
          }
# Break into NSBI 
NSLBI_dict, windows, swindex = ml.prepare_windows_from_stream(stream, model, 
                                                              merge_kwargs=mkwargs, method_1C='ZP')

display(NSLBI_dict)
display(windows)

  windows[_s, :, :] /= np.max(np.abs(windows[_s, :, :]), axis=-1, keepdims=True)


{'UW.ALKI..HN?': 3 Trace(s) in Stream:
UW.ALKI..HNE | 2017-05-11T13:45:00.000000Z - 2017-05-11T14:04:59.990000Z | 100.0 Hz, 120000 samples
UW.ALKI..HNN | 2017-05-11T13:45:00.000000Z - 2017-05-11T14:04:59.990000Z | 100.0 Hz, 120000 samples
UW.ALKI..HNZ | 2017-05-11T13:45:00.000000Z - 2017-05-11T14:04:59.990000Z | 100.0 Hz, 120000 samples,
 'UW.BABE..EN?': 3 Trace(s) in Stream:
UW.BABE..ENE | 2017-05-11T13:45:00.000000Z - 2017-05-11T14:05:00.000000Z | 100.0 Hz, 120001 samples (masked)
UW.BABE..ENN | 2017-05-11T13:45:00.000000Z - 2017-05-11T14:05:00.000000Z | 100.0 Hz, 120001 samples (masked)
UW.BABE..ENZ | 2017-05-11T13:45:00.000000Z - 2017-05-11T14:05:00.000000Z | 100.0 Hz, 120001 samples (masked),
 'UW.BRKS..EN?': 3 Trace(s) in Stream:
UW.BRKS..ENE | 2017-05-11T13:45:00.000000Z - 2017-05-11T14:05:00.000000Z | 100.0 Hz, 120001 samples (masked)
UW.BRKS..ENN | 2017-05-11T13:45:00.000000Z - 2017-05-11T14:05:00.000000Z | 100.0 Hz, 120001 samples (masked)
UW.BRKS..ENZ | 2017-05-11T13:45:00.0

array([[[ 0.0000000e+00,  9.5491506e-02,  2.0619424e-01, ...,
         -7.4355390e-05, -1.6663236e-05, -0.0000000e+00],
        [-0.0000000e+00, -9.5491506e-02, -2.0609820e-01, ...,
          1.2475927e-04,  6.6979948e-05,  0.0000000e+00],
        [-0.0000000e+00, -9.5491506e-02, -2.0612599e-01, ...,
          3.6667736e-04,  3.1475323e-05, -0.0000000e+00]],

       [[ 0.0000000e+00,  2.8453514e-03, -5.9957810e-02, ...,
         -1.3032970e-01,  2.7901430e-02,  0.0000000e+00],
        [ 0.0000000e+00, -3.4698267e-02, -9.7094283e-02, ...,
         -1.4385356e-01, -6.0818577e-04,  0.0000000e+00],
        [ 0.0000000e+00, -1.3022991e-02, -1.0274857e-01, ...,
         -2.3799948e-01, -2.7163042e-02,  0.0000000e+00]],

       [[-0.0000000e+00,  3.2391224e-02,  2.2549523e-02, ...,
          4.2825289e-02, -2.2765998e-02,  0.0000000e+00],
        [-0.0000000e+00,  2.3534514e-02,  8.6309053e-02, ...,
          1.5734751e-02,  2.1802621e-02,  0.0000000e+00],
        [-0.0000000e+00,  2.5563506e

Give the prepared windows a quick look by converting them back into traces

In [19]:
windows_stream = ml.reassemble_multistation_preds(windows, swindex, model, NSLBI_dict)
windows_stream.snuffle(ntracks=90)

  stack[:, _wi:_wi+mdata] = merge_method([stack[:, _wi:_wi+mdata], _data], axis=0)


(None, <pyrocko.pile.Sorted at 0x3d82fbbb0>)

In [12]:
# Run batched prediction
pred = ml.run_batched_prediction(windows, model, device, batch_size=10)

100%|██████████| 184/184 [00:10<00:00, 18.32it/s]


In [13]:
stream_dict_merged = Stream()
for _k in NSLBI_dict.keys():
    stream_dict_merged += NSLBI_dict[_k]
pred_stream = ml.reassemble_multistation_preds(pred,swindex,model,NSLBI_dict)

  stack[:, _wi:_wi+mdata] = merge_method([stack[:, _wi:_wi+mdata], _data], axis=0)


In [15]:
# Convert windows back to stream
pred_st = ml.reassemble_multistation_preds(pred, swindex, model, NSLBI_dict)

  stack[:, _wi:_wi+mdata] = merge_method([stack[:, _wi:_wi+mdata], _data], axis=0)


In [17]:
# Check resampled data
st2 = pred_st.copy()
for _k in NSLBI_dict.keys():
    st2 += NSLBI_dict[_k]
st2.snuffle(ntracks=90)

  mi, ma = num.nanmin(trace.ydata), num.nanmax(trace.ydata)
  ranges[k] = num.nanmin(mins), num.nanmax(maxs)


(None, <pyrocko.pile.Sorted at 0x2e22d8a30>)