# Visualize train/test seismic data
---
#### 1. Load data created in notebook 1_Database_Construction
#### 2. Split data
#### 3. Plot train/test seismic data for each split
---
#### File paths represent those used in the study.
#### Users will have to be sure file paths match where they stored the data/spreadsheets.

In [1]:
%%time
import os
#-----------------------------------------------------------------------------------------------------------------------#
import numpy as np
import pandas as pd
import koreageonet_v1 as KGN
import matplotlib.pyplot as plt
#-----------------------------------------------------------------------------------------------------------------------#
from obspy import *
#-----------------------------------------------------------------------------------------------------------------------#
# ML Packages 
from tensorflow.keras import *
from sklearn.model_selection import StratifiedKFold

CPU times: user 3.56 s, sys: 5.42 s, total: 8.99 s
Wall time: 4.94 s


## Load data vectors and concatenate
---

In [2]:
%%time
# Earthquakes
data_eq = np.load('/Volumes/Extreme SSD/Korea_Events/Earthquakes/seismic_data_eq.npy')
metadata_eq = np.load('/Volumes/Extreme SSD/Korea_Events/Earthquakes/metadata_eq.npy')
stn_labels_eq = np.load('/Volumes/Extreme SSD/Korea_Events/Earthquakes/stn_labels_eq.npy')
#-----------------------------------------------------------------------------------------------------------------------#
# Explosions
data_exp = np.load('/Volumes/Extreme SSD/Korea_Events/Explosions/seismic_data_exp.npy')
metadata_exp = np.load('/Volumes/Extreme SSD/Korea_Events/Explosions/metadata_exp.npy')
stn_labels_exp = np.load('/Volumes/Extreme SSD/Korea_Events/Explosions/stn_labels_exp.npy')

CPU times: user 2.19 ms, sys: 260 ms, total: 262 ms
Wall time: 898 ms


In [3]:
%%time
# Making one-hot-encoded labels (0 = earthquake, 1 = explosion)
y_exp = np.full(len(data_exp),1)
y_eq = np.full(len(data_eq),0)
y = np.concatenate((y_exp, y_eq))
y_ohe = utils.to_categorical(y.astype('int64'))
#-----------------------------------------------------------------------------------------------------------------------#
# Now let's merge the waveform data and event metadata
X = np.concatenate((data_exp, data_eq))
event_metadata = np.concatenate((metadata_exp, metadata_eq)) # event type, array, lat, lon, mag, dist
stn_labels = np.concatenate((stn_labels_exp, stn_labels_eq)) # station labels

CPU times: user 67.3 ms, sys: 235 ms, total: 302 ms
Wall time: 331 ms


## Execute k-fold cross-validation and save seismic train/test plots for each split
---

In [1]:
n_splits = 5; split = 0; plt.ioff()
kfold = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
for train_index, test_index in kfold.split(X, y):
    print('Begin split # ' +str(split+1) + ' of '+ str(n_splits))
    if split != 1: split += 1; continue
    #-----------------------------------------------------------------------------------------------------------------------#
    # Split dataset
    X_train, X_test = X[train_index], X[test_index]
    ev_metadata_train, ev_metadata_test = event_metadata[train_index], event_metadata[test_index]
    stn_labels_train, stn_labels_test = stn_labels[train_index], stn_labels[test_index]
    #-----------------------------------------------------------------------------------------------------------------------#
    # Plot train and test sets for split
    KGN.TrainTest_seismic_plots(ev_metadata_train, X_train, stn_labels_train, outfigdir='/Volumes/Extreme SSD/Korea_Events/Plots/Han_2023/Split_'+str(split+1)+'/Seismic/', train=True)
    KGN.TrainTest_seismic_plots(ev_metadata_test, X_test, stn_labels_test, outfigdir='/Volumes/Extreme SSD/Korea_Events/Plots/Han_2023/Split_'+str(split+1)+'/Seismic/', train=False)
    split += 1