# Train a model with SP logs
***

In [None]:
from auto_baseline import *
check_tf_gpu()

In [None]:
raw, clean, masked = load_train_logs(folder='train_logs_with_SPnorm', padded_length=42000, nfiles=10)

train_logs_clean_norm, train_logs_clean_scaler = datascaler(masked)
print('Train logs: {}'.format(train_logs_clean_norm.shape))

X_train = train_logs_clean_norm[..., [c for c in range(train_logs_clean_norm.shape[-1]) if c != 2]]
y_train = np.expand_dims(train_logs_clean_norm[...,2],-1)
print('X_train: {} | y_train: {}'.format(X_train.shape, y_train.shape))

In [None]:
def make_nn(kernel_size:int=15, drop=0.2, depths=[16,32,64], in_channels:int=10):
    K.clear_session()
    def enc_layer(inp, units):
        _ = layers.Conv1D(units, kernel_size, padding='same')(inp)
        _ = layers.BatchNormalization()(_)
        _ = layers.ReLU()(_)
        _ = layers.Dropout(drop)(_)
        _ = layers.MaxPooling1D(2)(_)
        return _
    def dec_layer(inp, units):
        _ = layers.Conv1D(units, kernel_size, padding='same')(inp)
        _ = layers.BatchNormalization()(_)
        _ = layers.ReLU()(_)
        _ = layers.Dropout(drop)(_)
        _ = layers.UpSampling1D(2)(_)
        return _
    def residual_cat(in1, in2):
        _ = layers.Concatenate()([in1, in2])
        return _
    def out_layer(inp, units):
        _ = dec_layer(inp, units)
        _ = layers.Conv1D(1, kernel_size, padding='same', activation='linear')(_)
        return _
    inputs  = layers.Input(shape=(None, in_channels))
    masked  = layers.Masking(mask_value=-999)(inputs)
    enc1    = enc_layer(masked, depths[0])
    enc2    = enc_layer(enc1, depths[1])
    zlatent = enc_layer(enc2, depths[2])
    dec3    = residual_cat(enc2, dec_layer(zlatent, depths[1]))
    dec2    = residual_cat(enc1, dec_layer(dec3, depths[0]))
    outputs = out_layer(dec2, 4)
    return Model(inputs, outputs, name='baseline_correction_bigpad')

In [None]:
#########################################
### If loading a pre-trained NN model ###
#########################################
# model = keras.models.load_model('baseline_correction_model_bigpad.keras')
# print('# parameters: {:,}'.format(model.count_params()))

#########################################
######## If training from scratch #######
#########################################
model = make_nn()
print('# parameters: {:,}'.format(model.count_params()))
model.compile(optimizer='adam', loss='mse', metrics=['mse'])
fit = model.fit(X_train, y_train, 
                epochs           = 100,
                batch_size       = 32,
                validation_split = 0.2,
                shuffle          = True,
                verbose          = True)
model.save('baseline_correction_model_bigpad.keras')
plot_loss(fit)

***
## Transfer Learning
***

In [None]:
in_folder  = 'test_logs'
out_folder = 'predicted_logs'

padded_length = 75000
sp_filt_size  = 25
csh_percents  = [5, 95]

#################################################################################
### Un-comment me if using pre-trained model instead of training from scratch ###
#################################################################################
# model = keras.models.load_model('baseline_correction_model_bigpad.keras')

files = os.listdir(in_folder)
log_list, k, = {}, 0
for file in tqdm(files, desc='Transfer learning predictions', unit=' file(s)'):
    log_las = lasio.read('{}/{}'.format(in_folder, file))
    if 'SP' not in log_las.keys():
        continue
    log_df = pd.DataFrame({'DEPT': log_las['DEPT'], 'SP': log_las['SP']})

    log = np.ones((1, padded_length, 2))*-999
    log[:, log_df.index, :] = log_df.values

    clean = np.nan_to_num(log, nan=-999)
    clean = np.ma.masked_where(clean==-999, clean)

    log, clean, masked = calc_features(log, clean)
    log_norm, scalers  = datascaler(masked)

    sp_pred = model.predict(log_norm, verbose=False)
    sp_pred = signal.medfilt(sp_pred.squeeze(), sp_filt_size)

    csh_linear, csh_percentile, csh_window = predict_csh(sp_pred, percentiles=csh_percents)
    sp_pred = sp_pred*scalers['sd'][1] + scalers['mu'][1]

    sp_pred = sp_pred[log_df.index].squeeze()
    csh_pred = csh_window[log_df.index].squeeze()

    log_las.append_curve('SP_PRED', sp_pred, unit='mV', descr='Predicted SP from baseline correction')
    log_las.append_curve('CSH_PRED', csh_pred, unit='v/v', descr='Estimated Csh from predicted SP')
    log_las.write('{}/{}'.format(out_folder, file), version=2.0)

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(8,8), sharey=True)
ax1, ax2 = axs

ax1.plot(log_las['SP'], log_las['DEPT'], color='tab:purple')
ax1.plot(log_las['SP_NORM'], log_las['DEPT'], color='purple')
ax1.plot(log_las['SP_PRED'], log_las['DEPT'])

ax2.plot(log_las['VSH_SP'], log_las['DEPT'], 'k')
ax2.plot(log_las['CSH_PRED'], log_las['DEPT'], 'r:')

ax1.invert_yaxis()
plt.tight_layout()
plt.show()

***
# END