# Gappy Data Preprocessing Tests
:auth: Nathan T. Stevens
:email: ntsteven at uw.edu
:org: Pacific Northwest Seismic Network


In [1]:
import os
import sys
import numpy as np
from tqdm import tqdm
from obspy import read, UTCDateTime, Stream
from glob import glob
import prediction_methods as ml
import matplotlib.pyplot as plt
from pyrocko import obspy_compat
%matplotlib widget
obspy_compat.plant()

In [2]:
# Define path to MSEED day_volumes
DATA_DIR = os.path.join('/Volumes', 'LaCie', 'PNW_Store_Local')
# Compose search string for glob.glob
GLOB_STR = os.path.join(DATA_DIR, 'PNW2017','UW','2017','131','*')
# Define time bounds for obspy.read
TS = UTCDateTime(2017,5,11,13,45)
TE = UTCDateTime(2017,5,11,14,5)

In [3]:
# Get data with gaps
flist = glob(GLOB_STR)
flist.sort()
stream = Stream()
for _f in tqdm(flist):
    stream += read(_f, fmt='MSEED')
stream.trim(starttime=TS, endtime=TE)


  0%|          | 0/27 [00:00<?, ?it/s]

100%|██████████| 27/27 [00:08<00:00,  3.32it/s]


2692 Trace(s) in Stream:

UW.ALKI..HNE | 2017-05-11T13:45:00.000000Z - 2017-05-11T14:05:00.000000Z | 200.0 Hz, 240001 samples
...
(2690 other traces)
...
UW.VVHS..ENZ | 2017-05-11T13:56:32.990000Z - 2017-05-11T14:05:00.000000Z | 100.0 Hz, 50702 samples

[Use "print(Stream.__str__(extended=True))" to print all Traces]

In [4]:
# Get model
model, device = ml.initialize_EQT_model()

# Test 1: Do Nothing
Check if gappy data produces valid (non-NaN, but perhaps still gappy) model values


In [9]:
# Prepare data
# Merge key-word-arguments
mkwargs = {'method':1,
           'fill_value':np.nan,
           'interpolation_samples':5
          }
ikwargs = {'method':'weighted_average_slopes',
           'no_filter': False
          }
# Break into NSBI 
NSLBI_dict, windows, swindex = ml.prepare_windows_from_stream(stream, model, 
                                                              merge_kwargs=mkwargs, method_1C='ZP')

display(NSLBI_dict)

  windows[_s, :, :] /= np.max(np.abs(windows[_s, :, :]), axis=-1, keepdims=True)


{'UW.ALKI..HN?': 3 Trace(s) in Stream:
UW.ALKI..HNE | 2017-05-11T13:45:00.000000Z - 2017-05-11T14:04:59.990000Z | 100.0 Hz, 120000 samples
UW.ALKI..HNN | 2017-05-11T13:45:00.000000Z - 2017-05-11T14:04:59.990000Z | 100.0 Hz, 120000 samples
UW.ALKI..HNZ | 2017-05-11T13:45:00.000000Z - 2017-05-11T14:04:59.990000Z | 100.0 Hz, 120000 samples,
 'UW.BABE..EN?': 124 Trace(s) in Stream:

UW.BABE..ENE | 2017-05-11T13:45:00.000000Z - 2017-05-11T13:45:13.990000Z | 100.0 Hz, 1400 samples
...
(122 other traces)
...
UW.BABE..ENZ | 2017-05-11T13:56:32.000000Z - 2017-05-11T14:05:00.000000Z | 100.0 Hz, 50801 samples

[Use "print(Stream.__str__(extended=True))" to print all Traces],
 'UW.BRKS..EN?': 264 Trace(s) in Stream:

UW.BRKS..ENE | 2017-05-11T13:45:00.000000Z - 2017-05-11T13:45:04.760000Z | 100.0 Hz, 477 samples
...
(262 other traces)
...
UW.BRKS..ENZ | 2017-05-11T13:57:22.920000Z - 2017-05-11T14:05:00.000000Z | 100.0 Hz, 45709 samples

[Use "print(Stream.__str__(extended=True))" to print all Trac

In [6]:
# Run batched prediction
pred = ml.run_batched_prediction(windows, model, device, batch_size=10)

100%|██████████| 184/184 [00:12<00:00, 14.84it/s]


In [8]:
stream_dict_merged = Stream()
for _k in NSLBI_dict.keys():
    stream_dict_merged += NSLBI_dict[_k]
pred_stream = ml.reassemble_multistation_preds(pred,swindex,model,NSLBI_dict)

  stack[:, _wi:_wi+mdata] = merge_method([stack[:, _wi:_wi+mdata], _data], axis=0)


ValueError: startime is larger than endtime

In [7]:
# Convert windows back to stream
pred_st = pp._reassemble_multistation(pred, swindex, model, st_dict)

  stack[:, _wi:_wi+mdata] = merge_method([stack[:, _wi:_wi+mdata], _data], axis=0)


ValueError: startime is larger than endtime

In [None]:
# Check resampled data
st2 = Stream()
for _k in st_dict.keys():
    st2 += st_dict[_k]
st2.snuffle(ntracks=90)