In [1]:
# import installed modules
# --------------------------------------------------------------
import os
import sys
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sys.path.append(os.path.abspath(".."))
# import local modules from src
# --------------------------------------------------------------
from src.data.utils import *
from src.data.load_data import Data
from src.matplotconf import matsettings

# 1. Data Prep and Exploration  
This notebook is for producing cleaning and producing the datasets required to train and test multi-linear regression models.

In [None]:
# load the raw data
eq_cat = Data.load_file("raw", "catalogs", "UTeq.csv")
spec_pars_1 = Data.load_file("raw", "fitdata", "ModelFitDb-Sg-1-2km.csv")
spec_pars_2 = Data.load_file("raw", "fitdata", "ModelFitDb-Sg-2-2km.csv")
mags = Data.load_file("raw", "magnitudes", "HOLT19-MwDirect-Sg-2km.csv")

### Get some informative outlier caps for the magnitude and observational data tables.

In [None]:
# some useful globals
MIN_NSTA = 3
MAX_STD_ERR = 0.25
MIN_DEP = 3
MAX_FC = spec_pars_1["fc"][spec_pars_1["fc"] < spec_pars_1["fc"].max()/1E5].describe()["max"]

### Clean the observations and magnitude data tables
Observations computed via method of Holt *et al*. (2021) and Mw computed via Holt (2019) (Chapter 2).

In [None]:
mag_table = clean_mag_table(mags.data, MIN_NSTA, MAX_STD_ERR) # target
fit_table = clean_fit_table(spec_pars_1.data, MIN_DEP, MAX_FC) # predictors
comb = pd.merge(fit_table, mag_table, on='otime') # combine the data tables 
comb = comb[['station', 'dep', 'repi', 'llpsp', 'fc', 'Mw']]  # isolate important predictors and target data

### Compute M0 and Mw at each station using Holt (2019)  

### Look at the resultant data table

In [None]:
sns.pairplot(comb.sample(1000), diag_kind='kde')