In [1]:
import h5py, os
import numpy as np
import pandas as pd
from tqdm import tqdm
import pickle, random
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
tf.config.list_physical_devices()

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:XLA_CPU:0', device_type='XLA_CPU'),
 PhysicalDevice(name='/physical_device:XLA_GPU:0', device_type='XLA_GPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [2]:
from bokeh.plotting import figure, show, output_notebook
output_notebook()

In [3]:
### seed_everythin
seed = 1987
random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)

### Read data from pickle

In [4]:
filename = 'selected_data.pkl'
outfile = open(filename,'rb')
data = pickle.load(outfile)

In [5]:
len(data['data'])

182330

In [6]:
xdata = np.array(data['data'], dtype = 'float32')
ylablel = np.array(data['labels'], dtype = 'float32')

In [7]:
xdata.shape, ylablel.shape

((182330, 50, 3), (182330,))

### Train test split

In [8]:
### Split data
X_train, X_val, y_train, y_val = train_test_split(xdata, ylablel, test_size=0.2)
X_train.shape, X_val.shape

((145864, 50, 3), (36466, 50, 3))

### Normalize data

In [9]:
xscalers = {}
for i in range(xdata.shape[2]):
    xscalers[i] = StandardScaler()
    X_train[:, :, i] = xscalers[i].fit_transform(X_train[:, :, i])

yscaler = StandardScaler()
y_train = yscaler.fit_transform(y_train.reshape(-1, 1))

In [10]:
scalers = {}
for i in range(xdata.shape[2]):
    X_val[:, :, i] = xscalers[i].transform(X_val[:, :, i])

y_val = yscaler.transform(y_val.reshape(-1, 1))

In [11]:
y_val.max(), y_train.max()

(6.706501, 7.02627)

In [12]:
# scalers = {}
# for i in range(xdata.shape[2]):
#     scalers[i] = StandardScaler()
#     xdata[:, :, i] = scalers[i].fit_transform(xdata[:, :, i])
    
# yscaler = StandardScaler()
# ylablel = yscaler.fit_transform(ylablel.reshape(-1, 1))

### Plot seismogram

In [13]:
p = figure(plot_width=600, plot_height=250)
x = np.arange(50)
i = 10
p.line(x, X_train[i][:, 0], line_width=2, color = 'red')
p.line(x, X_train[i][:, 1], line_width=2, color = 'blue')
p.line(x, X_train[i][:, 2], line_width=2, color = 'black')
show(p)

### Baseline Model

In [57]:
inputs = tf.keras.Input(shape=(50,3))
x = tf.keras.layers.Flatten()(inputs)
x = tf.keras.layers.Dense(128, activation=tf.nn.relu)(x)
x = tf.keras.layers.Dense(64, activation=tf.nn.relu)(x)
x = tf.keras.layers.Dropout(0.25)(x)
x = tf.keras.layers.Dense(16, activation=tf.nn.relu)(x)
outputs = tf.keras.layers.Dense(1)(x)
model = tf.keras.Model(inputs=inputs, outputs=outputs)

In [58]:
model.summary()

Model: "functional_15"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_8 (InputLayer)         [(None, 50, 3)]           0         
_________________________________________________________________
flatten_7 (Flatten)          (None, 150)               0         
_________________________________________________________________
dense_24 (Dense)             (None, 128)               19328     
_________________________________________________________________
dense_25 (Dense)             (None, 64)                8256      
_________________________________________________________________
dropout_7 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_26 (Dense)             (None, 16)                1040      
_________________________________________________________________
dense_27 (Dense)             (None, 1)               

In [59]:
model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), 
              loss="mean_squared_error",
#               metrics=["mean_absolute_error"]
             )

In [60]:
history = model.fit(x=X_train,
    y = y_train,
    batch_size=512,
    epochs=200,
    validation_data=(X_val, y_val))

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

In [61]:
p = figure(plot_width=600, plot_height=250)
x = np.arange(len(history.history['loss']))
p.line(x, history.history['loss'], line_width=2, color = 'blue', legend_label = 'Train MSE')
p.line(x, history.history['val_loss'], line_width=2, color = 'red', legend_label = 'Val MSE')
show(p)