In [None]:
mseed = 2

from numpy.random import seed
seed(mseed)
from tensorflow.random import set_seed
set_seed(mseed)

import os
import time
import datetime
import itertools
import h5py
import numpy as np
import pandas as pd
import pickle

import matplotlib.pyplot as plt
import matplotlib as mpl

import tensorflow as tf
from tensorflow import keras

tf.keras.backend.set_floatx('float32')

# 1. Autoencoder
## 1.1 Create Architecture

In [None]:
def makeModel(acti, loss, opti, lr):
    # Dense AE
    inputs = keras.Input(shape=(32,32))

    facticvation = 'relu'
    
    x = keras.layers.Flatten(input_shape=[32, 32])(inputs)
    x = keras.layers.Dense(1024, activation=facticvation, name='dense_1024J')(x)
    x = keras.layers.Dropout(0.5)(x)
    x = keras.layers.Dense( 512, activation=facticvation, name='dense_512J')(x)
    x = keras.layers.Dropout(0.5)(x)
    skip_a = x
    x = keras.layers.Dense( 256, activation=facticvation, name='dense_256J')(x)
    x = keras.layers.Dense( 128, activation=facticvation, name='dense_128J')(x)
    x = keras.layers.Dense(  64, activation=facticvation, name='dense_64J')(x)
    x = keras.layers.Dense(  32, activation=facticvation, name='dense_32J')(x)
    x = keras.layers.Dense(  64, activation=facticvation)(x)
    x = keras.layers.Dense( 128, activation=facticvation)(x)
    x = keras.layers.Dense( 256, activation=facticvation)(x)
    x = keras.layers.Dense( 512, activation=facticvation)(x)
    x = keras.layers.add([x, skip_a])
    x = keras.layers.Dense(1024, activation=facticvation)(x)
       
    outputs = keras.layers.Reshape([32, 32])(x)

    model = keras.Model(inputs=inputs, outputs=outputs, name='dense_ae_{}_{}_{}_{}'.format(acti, str(keras.optimizers.Adam).split('.')[-1].split("'")[0], lr, loss))
    model.compile(loss=[loss, None], optimizer=keras.optimizers.Adam(learning_rate=.00008), metrics=['accuracy'])
 
    return model, 'dense_all_skip'

## 1.2 Load Saved Model

In [None]:
model_ae, subfolder = makeModel('relu', 'mse', keras.optimizers.Adam, .0001)
checkpoint_filepath = 'checkpoints_tiles/'+subfolder
model_ae.load_weights(checkpoint_filepath)

# 2 Dilated Recurrent Network 
## 2.1 Read from the File and Create Slices

In [None]:
def makeNoEQArray_d14():
    f2 = h5py.File('ML_Tiles_000to070_FullBlock.hdf5', 'r')
    n_eq_avg = np.array(f2['n_eq'])
    maxm_loc = np.array(f2['maxmag'])
    f2.close()
    
    d = 14
    eq5 = maxm_loc >= 5.0
    neqcrit = n_eq_avg >= 10
    eq5mask = np.ones(eq5.shape, dtype=bool)
    inx5 = np.argwhere(eq5)
    
    for ixx, ixy, ixz in inx5:
        eq5mask[max(ixx-d+1, 0):ixx+1, max(ixy-16, 0):ixy+16, max(ixz-16, 0):ixz+16] = False
    
    noeqarray = np.zeros(maxm_loc.shape-np.array([512, 32,32]), dtype=bool)
    for i in range(512, maxm_loc.shape[0]):
        for j in range(16, maxm_loc.shape[1]-16):
            for k in range(16, maxm_loc.shape[2]-16):
                if np.max(maxm_loc[i-7:i+7, j-8:j+8, k-8:k+8]) < 4.5:
                    if np.mean(n_eq_avg[i-7:i+7, j-8:j+8, k-8:k+8]) > 10:
                        if eq5mask[i,j,k]:
                            noeqarray[i-512, j-16, k-16] = True
        print(i, end='\r')
    return noeqarray
    
#noeqarray_d14 = makeNoEQArray_d14()

# 2.2 Read Preprocessed Data

In [None]:
f2 = h5py.File('ML_Tiles_000to070_FullBlock.hdf5', 'r')
bval_loc = np.array(f2['b_value'])[:, 150:260, 150:260]
n_eq_avg = np.array(f2['n_eq'])[:, 150:260, 150:260]
maxm_loc = np.array(f2['maxmag'])[:, 150:260, 150:260]
dept_avg = np.array(f2['depth'])[:, 150:260, 150:260]
noeqarray = np.array(f2['NoEQArray'])[:, 150:228, 150:228] #was 150:222
f2.close()

b_block = np.clip(np.nan_to_num(bval_loc, posinf=2, neginf=0),0,2)

## 2.3 CDN Architecture

In [None]:
def makeCDN(size, lr=0.0001, lrschedule=None):
    alpha = 0.1
    class CDBlock(tf.keras.layers.Layer):
        def __init__(self, d1, d2, call, reduce, channels, regularizer=None, **kwargs):
            super(CDBlock, self).__init__(**kwargs)

            self.dilation = 2**(call-1)

            self.convdil_layer = [
                tf.keras.layers.Conv3D(filters=channels, kernel_size=(1,2,2), padding='same', strides=(1,2,2)),
                tf.keras.layers.Reshape([d1-self.dilation+1, -1]),
                tf.keras.layers.Conv1D(filters=(d2**2//channels), kernel_size=2, padding='valid', dilation_rate=self.dilation),
                tf.keras.layers.Reshape([d1+1-2**call, d2//channels, d2//channels, channels]),
                tf.keras.layers.BatchNormalization(),
                tf.keras.layers.ReLU(negative_slope=alpha),
                ]

            if not reduce:
                self.convdil_layer = self.convdil_layer[1:]

        def call(self, X):
            X = self.convdil_layer[0](X)
            for i in range(1, len(self.convdil_layer)):
                X = self.convdil_layer[i](X)

            return tf.keras.activations.relu(X, alpha=alpha)

    inputs = keras.Input(shape=(512,size))
    
    # With Dropout
    x = tf.keras.layers.Dropout(0.5, input_shape=(inputs.shape))(inputs)
    
    x = tf.keras.layers.Reshape([512, 32, 32, 1])(x) # Adjust input shape to match the other networks
    
    x = CDBlock(512, 32, 1, True ,  2)(x)
    x = CDBlock(512, 32, 2, False,  2)(x)
    x = CDBlock(512, 32, 3, True ,  4)(x)
    x = CDBlock(512, 32, 4, False,  4)(x)
    x = CDBlock(512, 32, 5, True ,  8)(x)
    x = CDBlock(512, 32, 6, False,  8)(x)
    x = CDBlock(512, 32, 7, True , 16)(x)
    x = CDBlock(512, 32, 8, False, 16)(x)
    x = CDBlock(512, 32, 9, True , 32)(x)

    x = tf.keras.layers.Reshape([-1])(x) 
    outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x) 

    adam = keras.optimizers.Adam(learning_rate=lr)
    
    print_lr = adam_print_lr(adam)

    model = keras.Model(inputs=inputs, outputs=outputs, name='ConvDil_test')
    model.compile(loss='binary_crossentropy', 
                optimizer=adam, # was 0.00005
                metrics=['accuracy', print_lr])        

    # 'acc' is automatically converted based on the loss function tf.keras.metrics.BinaryAccuracy()
    return model


## 2.4 Define Training Function

In [None]:
def trainProgressive(model, checkpoint_filepath_base, epochs=20, batch_size=32):
    logfile = 'epochLlog'
    tend = 0
    epochL = 5
    inputlen = 512
    np.random.seed(42)
    lastlen = 0
    time0 = time.time()
    

    f2 = h5py.File('ML_Tiles_000to070_FullBlock.hdf5', 'r')
    noeqarray = np.array(f2['NoEQArray_d14'])[:, 150:228, 150:228]
    f2.close()    
    
    steps_counter = 0
    with open(checkpoint_filepath_base+logfile, 'w'):
        print('Logfile created!')
    
    while tend+30 < bval_loc.shape[0]:
        tend = inputlen+(epochL+1)*30
        tstart = inputlen+(epochL)*30
        
        inxs_save = {
            'TP': [],
            'TN': [],
            'VP': [],
            'VN': [],
        }
        
        # TRAINING DATA
        inxs1 = np.argwhere(maxm_loc[inputlen:tend, 16:-16, 16:-16] >= 5.0) # target = 1

        lenEL = inxs1.shape[0]
        
        inxs0 = np.argwhere(noeqarray[:tend-inputlen])
        inxs0 = inxs0[np.random.choice(inxs0.shape[0], size=lenEL)]  
        
        targets_train = np.append(np.zeros(lenEL, dtype=bool), np.ones(lenEL, dtype='bool'))[::-1]
        
        data_train = np.append(
            np.array([b_block[inx[0]:inx[0]+inputlen, inx[1]:inx[1]+32, inx[2]:inx[2]+32] for inx in inxs1]),
            np.array([b_block[inx[0]:inx[0]+inputlen, inx[1]:inx[1]+32, inx[2]:inx[2]+32] for inx in inxs0]),
            axis=0
        )
        
        inxs_save['TP'] = inxs1
        inxs_save['TN'] = inxs0
            
        sample_weight = np.ones(targets_train.shape)
        if lastlen:
            sample_weight[lenEL-lastlen:lenEL]     = targets_train.shape[0]/(lastlen+1)
            sample_weight[2*lenEL-lastlen:2*lenEL] = targets_train.shape[0]/(lastlen+1)
            print('Weight factor: {}'.format(targets_train.shape[0]/(lastlen+1)))
        
        # VALIDATION DATA
        inxs1 = np.argwhere(maxm_loc[tend:tend+30, 16:-16, 16:-16] >= 5.0) # target = 1
        
        lenEL = inxs1.shape[0]
        
        inxs0 = np.argwhere(noeqarray[tend-inputlen:tend-inputlen+30])
        inxs0 = inxs0[np.random.choice(inxs0.shape[0], size=lenEL)]
        
    
        if lenEL == 0:
            print('No reasonable data in Epoch {}. Skipping'.format(epochL))
            epochL += 1
            lastlen = False
            continue
        else:
            targets_valid = np.append(np.zeros(lenEL, dtype=bool), np.ones(lenEL, dtype='bool'))[::-1]
            

            data_valid = np.append(
                np.array([b_block[inx[0]+tend-inputlen:inx[0]+tend, inx[1]:inx[1]+32, inx[2]:inx[2]+32] for inx in inxs1]),
                np.array([b_block[inx[0]+tend-inputlen:inx[0]+tend, inx[1]:inx[1]+32, inx[2]:inx[2]+32] for inx in inxs0]),
                axis=0
            )
            
            lastlen = targets_valid.shape[0]
            
            inxs_save['VP'] = inxs1
            inxs_save['VN'] = inxs0
   
        # Use AE on data here
    
        shape_valid = data_valid.shape
        shape_train = data_train.shape    
      
        data_valid = data_valid - np.reshape(model_ae.predict(np.reshape(data_valid, (-1, 32, 32))), (shape_valid))
        data_train = data_train - np.reshape(model_ae.predict(np.reshape(data_train, (-1, 32, 32))), (shape_train))

        
        print('Data Shapes for Epoch* {}:'.format(epochL+1))
        print(' Training Data:      {}'.format(data_train.shape))
        print(' Validation Data:      {}'.format(data_valid.shape))
        
        with open(checkpoint_filepath_base+logfile, 'a') as flog:
            flog.write('{:4d}, {:4d}, {:4d}, {:6.1f}s\n'.format(
                epochL,
                targets_train.shape[0], 
                targets_valid.shape[0], 
                #model.optimizer.lr.get_LR(),
                time.time()-time0))
            time0 = time.time()
        
        checkpoint_filepath = checkpoint_filepath_base + 'epochL_{:04d}/'.format(epochL)
       
        model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
            filepath=checkpoint_filepath,
            save_weights_only=True,
            monitor='accuracy',
            mode='max',
            save_best_only=True)
                
        history = model.fit(
            data_train,
            targets_train, 
            batch_size=batch_size,
            epochs=epochs,
            #steps_per_epoch=64,
            verbose=2,
            sample_weight=sample_weight,
            shuffle=True,
            validation_data=(data_valid, targets_valid),
            callbacks=model_checkpoint_callback) # round brackets very important!
        
        steps_counter += epochs
        
        model.load_weights(checkpoint_filepath)
        
        with open(checkpoint_filepath+'history.pkl', 'wb') as f:
            pickle.dump(history.history, f)
        
        with open(checkpoint_filepath+'inxs.pkl', 'wb') as f: #THIS WAS WRONG WHILE USING THE AE
            inxs_save['VP'] = np.insert(inxs_save['VP'].astype(float), 3, model.predict(data_valid[len(data_valid)//2:]), axis=1)
            inxs_save['VN'] = np.insert(inxs_save['VN'].astype(float), 3, model.predict(data_valid[:len(data_valid)//2]), axis=1)
            inxs_save['TP'] = np.insert(inxs_save['TP'].astype(float), 3, model.predict(data_train[len(data_train)//2:]), axis=1)
            inxs_save['TN'] = np.insert(inxs_save['TN'].astype(float), 3, model.predict(data_train[:len(data_train)//2]), axis=1)
            pickle.dump(inxs_save, f)
        
        with open(checkpoint_filepath+'val_data.dat', 'w') as f:
            f.write('NEQ, maxm, depth, inxs\n')
            for inx in inxs1:
                f.write('{}, {}, {}, {} {} {}\n'.format(
                n_eq_avg[tend+inx[0], inx[1]+16, inx[2]+16],
                maxm_loc[tend+inx[0], inx[1]+16, inx[2]+16],
                dept_avg[tend+inx[0], inx[1]+16, inx[2]+16],
                *inx))
            for inx in inxs0:
                f.write('{} {} {}\n'.format(*inx))
    
        print('Meta Training on Epoch {} ended. \nEnddate: {}\n\n\n'.format(epochL+1, np.datetime64('2000-01-01')+np.timedelta64(tend, 'D')))
        epochL += 1
    return model

In [None]:
checkpoint_filepath_base = '/home/srivastava-shared/data/Japan_progressive/CDN_seeds/s_{:02d}/'.format(mseed)
model = makeCDN_aug(32, lr=2e-5, lrschedule=True, aug_shift=2)
trainProgressive(model, checkpoint_filepath_base, epochs=20, batch_size=32, aug_shift=2)