# PNSN_Coastal_Dataset_Inspection
**:auth:** Nathan T. Stevens  
**:email:** ntsteven (at) uw.edu  
**:org:** Pacific Northwest Seismic Network  
**:license:** Creative Commons 4.0-BY  
**:attribution:**  
This notebook is part of a repository building on the `E3WS` repository published by Pablo Lara (Lara et al., 2023) and uses metadata from the Pacific Northwest Seismic Network (Network Code UW). If referencing elements related to `E3WS` please cite Lara et al. (2023)  

**:purpose:**  
This notebook documents PNSN catalog event sub-sampling to use for testing `E3WS`'s predictive capabilities on regional events, particularly those that bracket the current PNSN alert triggering threshold (M $\geq$ 2.95). 

**:references:**  
Pablo Lara, Quentin Bletery, Jean-Paul Ampuero, Adolfo Inza, Hernando Tavera. Earthquake Early Warning Starting From 3 s of Records on a Single Station With Machine Learning. Journal of Geophysical Research: Solid Earth. https://doi.org/10.1029/2023JB026575

In [60]:
from obspy import UTCDateTime
from obspy.clients.fdsn import Client
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px

In [None]:
# # Define PNSN reporting boundary
# rlatll, rlonll = 41., -129.
# rlatur, rlonur = 51., -116.

# # Define PNSN authoratative boundary (plus a little slop)
# alatll, alonll = 42., -125.5
# alatur, alonur = 49., -117.

# Define Coastal WA/OR Search Box
latll, lonll = 42., -125.5
latur, lonur = 49., -122.

In [None]:
# Initalize ObsPy Client
client = Client("IRIS")

In [43]:
# First, do some work to identify offshore events along Cascadia that may be of interest
CSZ_events = os.path.join('..','data','sql_queries','updated_event_origin_query.csv')
df = pd.read_csv(CSZ_events,index_col=[0])
# Do some basic filtering for just earthquakes with positive magnitudes
df = df[(df.magnitude > 0) & (df.etype=='eq')]
# Convert origin times from epoch to DateTime
df.datetime = df.datetime.apply(lambda x:pd.Timestamp(x, unit='s'))
display(df)

Unnamed: 0_level_0,etype,prefor,prefmag,selectflag,version,magnitude,magtype,magalgo,nsta,nobs,...,ndef,nbs,nbfm,fdepth,fepi,rflag,crust_type,crust_model,gtype,auth
evid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
60403201,eq,1123038,866953,1,8,2.59,d,HypoinvMd,2,2.0,...,6.0,1.0,6.0,y,y,F,H,,r,UW
60404061,eq,1125433,869338,1,8,2.41,d,HypoinvMd,3,3.0,...,18.0,3.0,10.0,y,n,F,H,,r,UW
60404206,eq,1125708,2514938,1,9,1.61,l,RichterMl2,7,12.0,...,21.0,12.0,8.0,n,n,F,H,,l,UW
60741056,eq,1678453,1423203,1,6,2.41,d,HypoinvMd,5,5.0,...,6.0,1.0,4.0,y,n,F,H,,r,UW
60404291,eq,1125838,869748,1,8,0.91,d,HypoinvMd,3,3.0,...,10.0,6.0,4.0,n,n,F,H,,l,UW
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61962896,eq,3242108,3233108,1,5,0.54,d,HypoinvMd,5,5.0,...,5.0,0.0,0.0,y,n,F,H,C3,l,UW
61502808,eq,3242123,3233123,1,4,0.84,d,HypoinvMd,4,4.0,...,5.0,0.0,0.0,y,n,F,H,C3,l,UW
61962871,eq,3242128,3233128,1,5,0.55,d,HypoinvMd,5,5.0,...,5.0,0.0,0.0,n,n,F,H,C3,l,UW
61962866,eq,3242148,3233148,1,7,0.64,d,HypoinvMd,4,4.0,...,4.0,0.0,0.0,y,y,F,H,C3,l,UW


In [96]:
# Do some additional geographic filtering for identifying temporal clusters of offshore events
df = df[(df.lon < -124.6) & (df.magnitude >= 2)] # chops out coastal events
px.scatter_geo(df[df.magnitude > 2],lat='lat',lon='lon',color='magnitude', hover_data=['datetime','magnitude','magtype'])

In [97]:
px.histogram(df,'datetime', nbins = 13*12)


In [113]:
target_window = [pd.Timestamp("2018-08-01"), pd.Timestamp("2019-01-01")]
df2 = df.copy()[(df.datetime >= target_window[0]) &\
                (df.datetime <= target_window[1])]
px.histogram(df2, 'datetime', nbins=100)
px.scatter_geo(df2,lon='lon',lat='lat',color='magnitude')

In [117]:
# Get a list of stations along the coast that were active during the
CHAN_STR = ""
for _b in "BEH":
    for _i in "HN":
        for _c in "Z3N1E2":
            CHAN_STR += _b + _i + _c
            if _b + _i + _c != 'HN2':
                CHAN_STR += ','

client = Client("IRIS")
inv = client.get_stations(
    network="UW",
    startbefore=UTCDateTime(target_window[0].isoformat()),
    endafter=UTCDateTime(target_window[1].isoformat()),
    minlongitude=-125,
    maxlongitude=-123.25,
    channel=CHAN_STR,
    level="channel",
)
display(inv)

Inventory created at 2023-11-16T22:59:35.425800Z
	Created by: IRIS WEB SERVICE: fdsnws-station | version: 1.1.52
		    http://service.iris.edu/fdsnws/station/1/query?network=UW&channel=B...
	Sending institution: IRIS-DMC (IRIS-DMC)
	Contains:
		Networks (1):
			UW
		Stations (46):
			UW.BABR (Baber Butte, OR, USA)
			UW.BAND (Bandon, OR, USA)
			UW.BILS (Queets, WA, USA)
			UW.BROK (Brookings, OR, USA)
			UW.CABL (Cape Blanco, OR, USA)
			UW.CHZZ (Cape Meares, OR, USA)
			UW.CNNB (Cannon Beach, OR, USA)
			UW.COOS (Coos Bay, OR, USA)
			UW.CORE (Aberdeen, WA, USA)
			UW.FISH (Jewel, OR, USA)
			UW.FLRE (Florence, OR, USA)
			UW.FORK (Forks, WA, USA)
			UW.HEBO (Mt. Hebo, OR CREST BB SMO)
			UW.HURR (Hurricane Ridge, WA, USA)
			UW.JEDS (Smith River, Reedsport, OR, USA)
			UW.KMO (Kings Mt., Oregon previous ISC code: KMOR)
			UW.LEBA (Lebam, WA, USA)
			UW.LRIV (Port Angeles, WA, USA)
			UW.LWCK (Lewis and Clark Interpretive Center, WA, USA)
			UW.MKAH (Makah Indian Reservation, WA, USA

In [123]:
# Convert inventory into dataframe
holder = []
for _n in inv.networks:
    net = _n.code
    for _s in _n.stations:
        sta = _s.code
        bi_codes = []
        nsc = len(_s.channels)
        if isinstance(_s.termination_date, UTCDateTime):
            termdate = pd.Timestamp(_s.termination_date.isoformat(),unit='s')
        else:
            termdate = pd.NaT
        for _c in _s.channels:
            if _c.code[:2] not in bi_codes:
                bi_codes.append(_c.code[:2])
                ch_count = 0; ch_str = ''
                for _xc in _s.channels:
                    if _xc.code[:2] == _c.code[:2]:
                        ch_count += 1
                        ch_str += _xc.code[-1]
                line = [net, sta, _c.code[:2], ch_str, ch_count, nsc, _s.latitude, _s.longitude, _s.elevation, pd.Timestamp(_s.creation_date.isoformat(),unit='s'), termdate]
                holder.append(line)
df_sta = pd.DataFrame(holder, columns=["Network", "Station", "BandInst", "BIChannels", "nBIChan", 'nSiteChan', "lat", "lon", "elev", 'on_date','off_date'])
display(df_sta.sort_values('on_date'))
px.scatter_geo(df_sta[(df_sta.BandInst.isin(['HH','HN'])) & (df_sta.nSiteChan>=6)],
               lat='lat',lon='lon', color='on_date',
               hover_data=['Network','Station','BandInst','BIChannels','nBIChan','nSiteChan'])

Unnamed: 0,Network,Station,BandInst,BIChannels,nBIChan,nSiteChan,lat,lon,elev,on_date,off_date
48,UW,STW,EH,Z,1,4,48.150669,-123.671059,318.4,1973-06-01,NaT
49,UW,STW,EN,ENZ,3,4,48.150669,-123.671059,318.4,1973-06-01,NaT
47,UW,SMW,EH,Z,1,1,47.31945,-123.34445,877.0,1975-03-01,NaT
32,UW,OCP,EH,Z,1,4,48.29787,-124.62516,470.0,1980-05-19,NaT
33,UW,OCP,EN,ENZ,3,4,48.29787,-124.62516,470.0,1980-05-19,NaT
30,UW,OBC,EH,Z,1,1,48.03511,-124.078819,938.0,1980-05-29,NaT
20,UW,KMO,EH,Z,1,1,45.63533,-123.49073,975.0,1982-09-30,NaT
38,UW,OTR,EH,Z,1,1,48.08632,-124.34518,541.6,1984-06-01,NaT
36,UW,OSD,EH,Z,1,4,47.81642,-123.70568,2028.0,1984-10-04,NaT
37,UW,OSD,EN,ENZ,3,4,47.81642,-123.70568,2028.0,1984-10-04,NaT


# Notes on preferred reference stations
UW.JEDS..H[HN]?
UW.BABR..H[HN]? 
UW.FISH..H[HN]? 
UW.LEBA..H[HN]?

In [127]:
inv_sub = client.get_stations(network='UW',station='JEDS,BABR,FISH,LEBA',
                              channel=CHAN_STR,level='response',
                              starttime=UTCDateTime(target_window[0].isoformat()),
                              endtime=UTCDateTime(target_window[1].isoformat()))
display(inv_sub)

Inventory created at 2023-11-16T23:06:06.138700Z
	Created by: IRIS WEB SERVICE: fdsnws-station | version: 1.1.52
		    http://service.iris.edu/fdsnws/station/1/query?starttime=2018-08-01...
	Sending institution: IRIS-DMC (IRIS-DMC)
	Contains:
		Networks (1):
			UW
		Stations (4):
			UW.BABR (Baber Butte, OR, USA)
			UW.FISH (Jewel, OR, USA)
			UW.JEDS (Smith River, Reedsport, OR, USA)
			UW.LEBA (Lebam, WA, USA)
		Channels (24):
			UW.BABR..ENZ, UW.BABR..ENN, UW.BABR..ENE, UW.BABR..HHZ, 
			UW.BABR..HHN, UW.BABR..HHE, UW.FISH..ENZ, UW.FISH..ENN, 
			UW.FISH..ENE, UW.FISH..HHZ, UW.FISH..HHN, UW.FISH..HHE, 
			UW.JEDS..ENZ, UW.JEDS..ENN, UW.JEDS..ENE, UW.JEDS..HHZ, 
			UW.JEDS..HHN, UW.JEDS..HHE, UW.LEBA..ENZ, UW.LEBA..ENN, 
			UW.LEBA..ENE, UW.LEBA..HHZ, UW.LEBA..HHN, UW.LEBA..HHE

Possible lead on (re)training:  
https://shunya-vichar.medium.com/incremental-learning-in-xgboost-b3eac6135ce#:~:text=Update%20the%20model%3A%20XGBoost%20provides,based%20on%20the%20new%20information.