In [86]:
import gc
import sys
sys.path.append('../../../')

import numpy as np
import pandas as pd
import tensorflow as tf

from PIL import Image
from sklearn.model_selection import train_test_split

In [167]:
# Load extracted raw signals
raw_signals = np.load('../../../data/STEAD/extracted_raw_signals_subsample_100000.npy')

# Load created spectrogram images
imgs = np.load('../../../data/STEAD/created_spectrogram_images_subsample_100000.npy')

# Load extracted raw signals
metadata = pd.read_feather('../../../data/STEAD/extracted_metadata_subsample_100000.feather')
metadata = metadata.reset_index()

In [168]:
subsample_indices = np.random.choice(len(imgs), size=5000)
imgs = imgs[subsample_indices]
metadata = metadata.iloc[subsample_indices]

In [169]:
metadata

Unnamed: 0,trace_name,network_code,receiver_code,receiver_type,receiver_latitude,receiver_longitude,receiver_elevation_m,p_arrival_sample,p_status,p_weight,...,source_magnitude_author,source_mechanism_strike_dip_rake,source_distance_deg,source_distance_km,back_azimuth_deg,snr_db,coda_end_sample,trace_start_time,trace_category,chunk
96029,JHU.NC_200109061324_NO,NC,JHU,HN,37.214270,-122.171680,234.0,,,,...,,,,,,,,2001-09-06 13:24:00,noise,6
19744,KNK.AK_20121210081710_EV,AK,KNK,BH,61.413100,-148.458500,598.0,500.0,manual,0.67,...,,,0.78700,87.68,347.19,[14.89999962 26.29999924 6.4000001 ],[[3199.]],2012-12-10 08:17:11.695000,earthquake_local,2
38445,GDXB.NC_20110124054225_EV,NC,GDXB,HH,38.807968,-122.795303,939.0,700.0,manual,0.72,...,NC,,0.01351,1.42,344.10,[49.29999924 45.09999847 44.40000153],[[1660.]],2011-01-24 05:42:26.520000,earthquake_local,3
65518,PPLA.AK_20170330145844_EV,AK,PPLA,BH,62.896200,-152.189400,1519.0,600.0,manual,0.96,...,,,0.01800,2.00,128.05,[36.70000076 42.40000153 46.90000153],[[1369.]],58:45.7,earthquake_local,5
21945,KAN05.GS_20141203162655_EV,GS,KAN05,HH,37.108650,-97.872280,348.0,500.0,manual,0.57,...,,,0.26070,28.99,49.50,[27. 14.69999981 10.80000019],[[2100.]],2014-12-03 16:26:56.810000,earthquake_local,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26373,N16K.TA_20170527053531_EV,TA,N16K,BH,60.474200,-158.769000,582.0,500.0,manual,0.88,...,,,0.30000,33.51,264.79,[25.39999962 31. 24. ],[[2077.]],2017-05-27 05:35:32.845000,earthquake_local,2
10300,B023.PB_20161216070828_EV,PB,B023,EH,46.111200,-123.078700,177.4,400.0,manual,0.61,...,,,0.16820,18.69,94.00,[14.10000038 16.10000038 8. ],[[2340.]],08:29.8,earthquake_local,1
4854,B084.PB_20150719143920_EV,PB,B084,HH,33.611570,-116.456370,1271.0,700.0,manual,0.62,...,,,0.29830,33.17,267.10,[14.60000038 12.60000038 21.39999962],[[1889.]],39:22.0,earthquake_local,1
55655,BACM.NM_20150424064252_EV,NM,BACM,EH,36.685000,-89.865000,84.0,900.0,manual,0.60,...,,,0.32850,36.52,118.70,[6.30000019 5.9000001 3.5999999 ],[[3199.]],42:53.1,earthquake_local,4


In [170]:
labels = [1 if label=='earthquake_local' else 0 for label in metadata['trace_category']]

In [171]:
def cnn_image_preprocessing(image, image_size=(300,200)):
    image = tf.image.resize(image, image_size)  # Resize image
    image = tf.cast(image, tf.float32) / 255.0  # Normalize to [0, 1]
    return image

In [172]:
X_train, X_test, y_train, y_test = train_test_split(imgs, labels, random_state=0, test_size=0.25)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, random_state=0, test_size=0.25)

In [173]:
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val))
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))

In [174]:
train_dataset = train_dataset.map(lambda x, y: (cnn_image_preprocessing(x), y))
val_dataset = val_dataset.map(lambda x, y: (cnn_image_preprocessing(x), y))
test_dataset = test_dataset.map(lambda x, y: (cnn_image_preprocessing(x), y))

In [175]:
train_dataset = train_dataset.shuffle(buffer_size=1000).batch(32).prefetch(tf.data.experimental.AUTOTUNE)
val_dataset = val_dataset.batch(32).prefetch(tf.data.experimental.AUTOTUNE)
test_dataset = test_dataset.batch(32).prefetch(tf.data.experimental.AUTOTUNE)

In [186]:
# Callback to stop model training early if loss stops improving
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_classification_loss',    
    patience=5,                # number of epochs to wait for improvement
    restore_best_weights=True, # restore the best weights once training stops
    verbose=1                 
)

# Callback to reduce learning rate if loss stops improving
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_classification_loss', 
    factor=0.1,                # the factor by which the learning rate will be reduced
    patience=2,                # number of epochs to wait for improvement
    verbose=1
)

callbacks = [
    early_stopping,
    reduce_lr
]

In [None]:

inputs = tf.keras.layers.Input(shape=input_shape)

In [183]:
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, kernel_size=(5, 5), activation='relu', padding='same'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(2, activation='softmax')  # 10 output classes
])

In [184]:
opt = tf.keras.optimizers.Adam(learning_rate=1e-6)
model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(train_dataset, epochs=10, callbacks=callbacks, validation_data=val_dataset)

Epoch 1/10
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 806ms/step - accuracy: 0.7425 - loss: 0.5404 - val_accuracy: 0.8081 - val_loss: 0.4480 - learning_rate: 1.0000e-06
Epoch 2/10
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 800ms/step - accuracy: 0.8075 - loss: 0.4341 - val_accuracy: 0.8081 - val_loss: 0.3911 - learning_rate: 1.0000e-06
Epoch 3/10
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 808ms/step - accuracy: 0.8209 - loss: 0.3639 - val_accuracy: 0.8081 - val_loss: 0.3508 - learning_rate: 1.0000e-06
Epoch 4/10
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 816ms/step - accuracy: 0.8275 - loss: 0.3265 - val_accuracy: 0.8145 - val_loss: 0.3196 - learning_rate: 1.0000e-06
Epoch 5/10
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m73s[0m 831ms/step - accuracy: 0.8291 - loss: 0.3041 - val_accuracy: 0.8273 - val_loss: 0.2956 - learning_rate: 1.0000e-06
Epoch 6/10
[1m88/88[0m [32m━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x17e549760>

In [185]:
model.evaluate(test_dataset)

[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 123ms/step - accuracy: 0.9410 - loss: 0.1824


[0.18506132066249847, 0.9416000247001648]