- model builder
- date: 2020-08-07
- maintainer: YZK

In [14]:
# jupyter nbconvert --to script mbuilder.ipynb

In [15]:
from datetime import datetime, timedelta
import argparse
import logging
import math
import os
import re
import sys

from collections import deque, Counter
from fbprophet import Prophet
from functools import partial
from imblearn.under_sampling import ClusterCentroids
from imblearn.under_sampling import RandomUnderSampler
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras import Sequential, Model, losses
from tensorflow.keras import constraints, initializers, regularizers

from tensorflow.keras.layers import Bidirectional, Lambda, Layer, TimeDistributed
from tensorflow.keras.layers import Activation, BatchNormalization, Conv1D, Conv2D, Dense, Dropout, Flatten, Input, LSTM, MaxPool1D, MaxPool2D
from tensorflow.keras.losses import Loss
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras import initializers, regularizers, constraints
from tensorflow.keras.callbacks import Callback, CSVLogger, EarlyStopping, LearningRateScheduler, ModelCheckpoint, ReduceLROnPlateau, TensorBoard
from tensorflow.keras.utils import plot_model

from tensorflow.python.keras.losses import LossFunctionWrapper
from tensorflow.python.keras.utils import losses_utils
from tensorflow.python.util.tf_export import keras_export


if __name__ == "__main__":
    import msetup
    msetup.setLogging(loglv=logging.DEBUG)

In [16]:
# os.environ["CUDA_VISIBLE_DEVICES"] = "1"

In [17]:
def lstmbuilder(units, input_shape, loss, optimizer):
    '''
        input_shape: a tuple (timesteps, nfeatures)
    '''
    
    lstm = Sequential()
    lstm.add(LSTM(units, input_shape=input_shape))
    lstm.add(Dense(1))
    lstm.compile(loss=loss, optimizer=optimizer)
             
    return lstm


# lstmbuilder(10, (10, 3), 'mae', SGD()).summary()

In [18]:
class Attention(Layer):
    def __init__(self, step_dim,
                 W_regularizer=None, b_regularizer=None,
                 W_constraint=None, b_constraint=None,
                 bias=True, **kwargs):
        """
        
        Keras Layer that implements an Attention mechanism for temporal data.
        Supports Masking. 
        Follows the work of Raffel et al. [https://arxiv.org/abs/1512.08756]
        # Input shape
            3D tensor with shape: `(samples, steps, features)`.
        # Output shape
            2D tensor with shape: `(samples, features)`.
        :param kwargs:
        Just put it on top of an RNN Layer (GRU/LSTM/SimpleRNN) with return_sequences=True.
        The dimensions are inferred based on the output shape of the RNN.
        Example:
            model.add(LSTM(64, return_sequences=True))
            model.add(Attention())
        
        tensorflow & keras reference:
            https://www.tensorflow.org/guide/keras/custom_layers_and_models
            https://www.tensorflow.org/guide/keras/masking_and_padding
            https://www.tensorflow.org/api_docs/python/tf/keras/layers/Masking
            
        """
        self.supports_masking = True
        #self.init = initializations.get('glorot_uniform')
        self.init = initializers.get('glorot_uniform')

        self.W_regularizer = regularizers.get(W_regularizer)
        self.b_regularizer = regularizers.get(b_regularizer)

        self.W_constraint = constraints.get(W_constraint)
        self.b_constraint = constraints.get(b_constraint)

        self.bias = bias
        self.step_dim = step_dim
        self.features_dim = 0
        super(Attention, self).__init__(**kwargs)  # inherit Layer

    def build(self, input_shape):
        '''
            deferring weight creation until the shape of the inputs is known
            input_shape[-1] is the number of features if len(input_shape) == 3, [batchsize, timestep, # of feature]
        '''
        
        assert len(input_shape) == 3

        self.W = self.add_weight((input_shape[-1],),
                                 initializer=self.init,
                                 name='{}_W'.format(self.name),
                                 regularizer=self.W_regularizer,
                                 constraint=self.W_constraint)
        
#         self.step_dim = input_shape[-2]
        self.features_dim = input_shape[-1]

        if self.bias:
            self.b = self.add_weight((input_shape[1],),
                                     initializer='zero',
                                     name='{}_b'.format(self.name),
                                     regularizer=self.b_regularizer,
                                     constraint=self.b_constraint)
        else:
            self.b = None

        self.built = True

    def compute_mask(self, input, input_mask=None):
        # do not pass the mask to the next layers
        return None

    def call(self, x, mask=None):
        '''
            The __call__() method of your layer will automatically run build the first time it is called. 
            You now have a layer that's lazy and thus easier to use
        '''
        
        # eij = K.dot(x, self.W) TF backend doesn't support it

        # features_dim = self.W.shape[0]
        # step_dim = x._keras_shape[1]

        features_dim = self.features_dim
        step_dim = self.step_dim
        
        eij = K.reshape(K.dot(K.reshape(x, (-1, features_dim)), K.reshape(self.W, (features_dim, 1))), (-1, step_dim))

        if self.bias:
            eij += self.b

        eij = K.tanh(eij)

        a = K.exp(eij)

        # apply mask after the exp. will be re-normalized next
        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            a *= K.cast(mask, K.floatx())

        # in some cases especially in the early stages of training the sum may be almost zero
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        a = K.expand_dims(a)
        weighted_input = x * a
    #print weigthted_input.shape
        return K.sum(weighted_input, axis=1)

    def compute_output_shape(self, input_shape):
        #return input_shape[0], input_shape[-1]
        return input_shape[0], self.features_dim

In [19]:
def l2norm(x):
    if isinstance(x, np.ndarray):
        l2 = np.sum(x**2)
    else:
        l2 = tf.map_fn(lambda t: tf.reduce_sum(tf.square(t)), elems=x)
    
    return l2

def cosine_similarity(y_true, y_pred, axis=1):
    
    y_pred = tf.convert_to_tensor(y_pred)
    y_true = tf.cast(y_true, y_pred.dtype)
    
#     l2norm1 = tf.reduce_sum(tf.square(y_true), axis=axis)
#     l2norm2 = tf.reduce_sum(tf.square(y_pred), axis=axis)
    l2norm1 = tf.map_fn(lambda t: tf.reduce_sum(tf.square(t)), elems=y_true)
    l2norm2 = tf.map_fn(lambda t: tf.reduce_sum(tf.square(t)), elems=y_pred)
    yy = tf.reduce_sum(tf.multiply(y_true, y_pred), axis=axis)
        
    return -tf.divide(yy, tf.multiply(tf.sqrt(l2norm1), tf.sqrt(l2norm2)))
    

# Customized Losses
- Inherits From [Loss](https://www.tensorflow.org/api_docs/python/tf/keras/losses/Loss)
- Reference of Implement
1. [BaseLossClass](https://github.com/tensorflow/tensorflow/blob/v2.4.1/tensorflow/python/keras/losses.py#L47)
2. [LossFunctionWrapper](https://github.com/tensorflow/tensorflow/blob/v2.4.1/tensorflow/python/keras/losses.py#L213)
3. [CategoricalCrossentropy](https://github.com/tensorflow/tensorflow/blob/v2.4.1/tensorflow/python/keras/losses.py#L587-L662)
- When used with `tf.distribute.Strategy`, outside of built-in training loops such as `tf.keras`, `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE` will raise an error. Please see this custom training [tutorial](https://www.tensorflow.org/tutorials/distribute/custom_training) for more details. 


In [20]:
class YZKError(Loss):
    def __init__(self,
                 reduction=losses_utils.ReductionV2.AUTO,
                 name=None,
                 element_weight=None, 
                 penalized=None):
    
        """ 
            Initializes `YZKError` instance. get_config() need   

            Args:
              reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to
                loss. Default value is `AUTO`. `AUTO` indicates that the reduction
                option will be determined by the usage context. For almost all cases
                this defaults to `SUM_OVER_BATCH_SIZE`. 
              name: Optional name for the op. 
        """

        super(YZKError, self).__init__(name=name, reduction=reduction)
        self.element_weight = element_weight
        self.penalized = penalized
    
    def call(self, y_true, y_pred):

        element_weight = self.element_weight

#         logging.info(self.reduction, element_weight)
        
        y_pred = tf.convert_to_tensor(y_pred)
        y_true = tf.cast(y_true, y_pred.dtype)
        batchsize = y_pred.shape[0] 
        assert batchsize == y_true.shape[0]
        
#         logging.info("YZKError, shape of y_true = {}, y_pred = {}".format(y_true.shape, y_pred.shape))
        
        PLoss = 0.    
        penalized = self.penalized
        if penalized is not None:
            
            _y_pred = tf.convert_to_tensor(y_pred[:, penalized])
            _y_true = tf.convert_to_tensor(y_true[:, penalized])
            
            _y_pred = tf.reshape(_y_pred, [-1, 1])
            _y_true = tf.reshape(_y_true, [-1, 1])
#                 PLoss = tf.losses.MeanAbsoluteError(reduction=tf.losses.Reduction.NONE)(y_true[:, penalized], y_pred[:, penalized])
            PLoss = tf.losses.MeanAbsoluteError(reduction=self.reduction)(_y_true, _y_pred)
        
#         HuberLoss = 0
#         MAELoss = 0
#         if element_weight is not None:
#             element_weight = tf.convert_to_tensor(element_weight)
#             if element_weight.shape != []:  # is not a scale
#                 nelement = y_pred.shape[1]
#                 assert nelement == y_true.shape[1] 
#                 element_weight = tf.broadcast_to(element_weight, [batchsize, nelement])
                
#             y_pred_ = tf.math.multiply(y_pred, element_weight)
#             y_true_ = tf.math.multiply(y_true, element_weight)
        
#             HuberLoss = tf.losses.Huber(reduction=self.reduction, delta=0.5)(y_true_, y_pred_)
#         else:
#             HuberLoss = tf.losses.Huber(reduction=self.reduction, delta=0.5)(y_true, y_pred)
        
        MAELoss = tf.losses.MeanAbsoluteError(reduction=self.reduction)(y_true, y_pred)


    #         CosSimLoss = tf.losses.CosineSimilarity(reduction=tf.losses.Reduction.NONE)(y_true, y_pred, sample_weight=sample_weight)
        CosSimLoss = tf.losses.CosineSimilarity(reduction=self.reduction)(y_true, y_pred)

#         logging.info("YZKError, PLoss: {}\n, HuberLoss: {}\n, MAELoss: {}\n, CosSimLoss: {}\n".format(PLoss, HuberLoss, MAELoss, CosSimLoss))
        
        if penalized is not None:
            return tf.math.add(tf.math.add(tf.math.scalar_mul(3, PLoss), tf.math.scalar_mul(2, MAELoss)), tf.math.scalar_mul(1, CosSimLoss))
        else:
            return tf.math.add(tf.math.scalar_mul(2, MAELoss), tf.math.scalar_mul(1, CosSimLoss))
        
    def get_config(self):
        """Returns the config dictionary for a `Loss` instance."""
        return {'reduction': self.reduction, 'name': self.name}

## inverse transform sigmoid(x)
<center>
<font size=4>
$
\begin{align}
\sigma &=\frac{1}{1+e^{-x}} \\
\frac{1}{\sigma} &= 1+e^{-x} \\
\frac{1}{\sigma}-1 &= e^{-x} \\
\log{\frac{1-\sigma}{\sigma}} &= \log{e^{-x}} \\
x &= -\log{\frac{1-\sigma}{\sigma}} \\
x &= \log{\frac{\sigma}{1-\sigma}}
\end{align}
$
</font size>
</center>

## Binary Crossentropy

<center><font size=4>$BCE=y_{true}\cdot\log{y_{pred}}+\left(1-y_{true}\right)\cdot\log{\left(1-y_{pred}\right)}$</font></center>

In [21]:
class WeightedBinaryCrossntropy(Loss):
    def __init__(self,
                 reduction=losses_utils.ReductionV2.AUTO,
                 name=None,
                 element_weight=None):
    
        """ 
            Args:
                element_weight: a weight list [weight for y_true=1, weight for y_true=0]
            Ex.
                WeightedBinaryCrossntropy(reduction=tf.losses.Reduction.NONE, element_weight=[5, 1])(y_true, y_pred)
        """

        super(WeightedBinaryCrossntropy, self).__init__(name=name, reduction=reduction)
        self.element_weight = element_weight
    
    def call(self, y_true, y_pred, from_logits=False):
        
        y_pred = tf.convert_to_tensor(y_pred)
        y_true = tf.cast(y_true, y_pred.dtype)
        
        element_weight = self.element_weight
        
#         print(element_weight)
        
        if not from_logits:  # after activation
            y_pred = tf.clip_by_value(y_pred, tf.keras.backend.epsilon(), 1 - tf.keras.backend.epsilon())
            y_pred = tf.math.log(y_pred / (1 - y_pred))
            y_pred = tf.math.sigmoid(y_pred)
            
        if element_weight is None:
            bc = -(y_true * tf.math.log(y_pred) + (1 - y_true) * tf.math.log(1 - y_pred))
        else:
            bc = -(y_true * tf.math.log(y_pred) * element_weight[0] + (1 - y_true) * tf.math.log(1 - y_pred) * element_weight[1])

        return tf.math.reduce_mean(bc, axis=-1)


# Customized Layers
- [Making new Layers and Models via subclassing](https://www.tensorflow.org/guide/keras/custom_layers_and_models)
- [masking_and_padding](https://www.tensorflow.org/guide/keras/masking_and_padding)
- [Masking](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Masking)
- Layers are recursively composable</br>
If you assign a Layer instance as an attribute of another Layer, <font color="red">the outer layer will start tracking the weights of the inner layer.</font></br>
We recommend creating such sublayers in the <font color="gray">\_\_init\_\_()</font> method (since the sublayers will typically have a build method, they will be built when the outer layer gets built).
- You can optionally enable serialization on your layers  
If you need your custom layers to be serializable as part of a Functional model, you can optionally implement a <font color="gray">get_config()</font> method

# Reference
- [Weight initialization & Batch Normalization](https://reurl.cc/9ZRela)

In [33]:
class TPCNN1D(Layer):

    '''
        temporal pattern 1d-CNN
        input shape = [None, timesteps, # of cells], channel is timesteps
        output shape = [None, features, filters] or [None, m, k]
        
        Layers are recursively composable
            If you assign a Layer instance as an attribute of another Layer, 
            the outer layer will start tracking the weights of the inner layer.
            We recommend creating such sublayers in the __init__() method (since the sublayers will typically have a build method, t
            hey will be built when the outer layer gets built).
    '''
    def __init__(self, filters, name="TPCNN1D", **kwargs):
        super(TPCNN1D, self).__init__(name=name, **kwargs)
        self.filters = filters
        self.cnn1d = Conv1D(kernel_size=1, filters=self.filters, data_format='channels_first', name=self.name)

        
#     def build(self, input_shape):
#         self.cnn1d = Conv1D(kernel_size=1, filters=self.filters, data_format='channels_first', name=self.name)
#         self.cnn1d.build(input_shape)
#         self._trainable_weights = self.cnn1d.trainable_weights
        
#         super(TPCNN1D, self).build(input_shape)
        
    def call(self, x):
#         print(self.cnn1d(x))
#         return self.cnn1d(x)
        # if channel first, then output shape = [None, filters, features] that need to transpose to [None, features, filters]
        return tf.transpose(self.cnn1d(x), perm=[0, 2, 1])

    def get_config(self):
        config = {"filters": self.filters}
        base_config = super(TPCNN1D, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

In [23]:
class Linear(Layer):
    def __init__(self, units=32, **kwargs):
        super(Linear, self).__init__(**kwargs)
        self.units = units
        
    def build(self, input_shape):  
        self.W = self.add_weight(
            shape=(input_shape[-1], units),
            initializer="random_normal",
            trainable=True)
        
    def call(self, inputs):
        return tf.matmul(inputs, self.W)
    
    def get_config(self):
        config = super(Linear, self).get_config()
        config.update({"units": self.units})
        return config    

In [24]:
class TPAttention(Layer):
    def __init__(self, timesteps=None, features=None,
                 W_regularizer=None, W_constraint=None, name="TPAttention", **kwargs):
        '''     
            Temporal Pattern Attention [https://arxiv.org/abs/1809.04206]             
                
                - k: filters or timesteps - 1 
                - m: features
            
            Keras Layer that implements an Attention mechanism for temporal data.
            Supports Masking. 
            
            Follows the work of Raffel et al. [https://arxiv.org/abs/1512.08756]
            # Input shape
                3D tensor with shape: `(samples, steps, features)`.
            # Output shape
                2D tensor with shape: `(samples, features)`.
                
            :param kwargs:
            Just put it on top of an RNN Layer (GRU/LSTM/SimpleRNN) with return_sequences=True.
            The dimensions are inferred based on the output shape of the RNN.
            Example:
                model.add(LSTM(64, return_sequences=True))
                model.add(Attention())
            
        '''
        super(TPAttention, self).__init__(name=name, **kwargs)  # inherits from Layer

#         self.Linear()
        
        self.supports_masking = True
        self.init = initializers.get('glorot_uniform')

        self.W_regularizer = regularizers.get(W_regularizer)
        self.W_constraint = constraints.get(W_constraint)

        self.timesteps = timesteps
        self.features = features
        

    def build(self, input_shape):
        '''
            deferring weight creation until the shape of the inputs is known
            input_shape[-1] is the number of features if len(input_shape) == 3, [batchsize, timestep, # of feature]
        '''

        if self.timesteps is None:  # k + 1
            self.timesteps = input_shape[-1] + 1
            
        if self.features is None:  # m
            self.features = input_shape[-2]   

        # shape of W = [k, m]
        self.W = self.add_weight(shape=(input_shape[-1], self.features,),
                                 initializer=self.init,
                                 name='{}_W'.format(self.name),
                                 regularizer=self.W_regularizer,
                                 constraint=self.W_constraint)
        
        # shape of W_h = [m, m]        
        self.W_h = self.add_weight(shape=(self.features, self.features,),
                                   initializer=self.init,
                                   name='{}_Wh'.format(self.name))
        
        # shape of W_v = [m, k]
        self.W_v = self.add_weight(shape=(self.features, input_shape[-1],),
                                   initializer=self.init,
                                   name='{}_Wv'.format(self.name))
        
        self.built = True

    def compute_mask(self, input, input_mask=None):
        # do not pass the mask to the next layers
        return None

    def call(self, x, h_t, mask=None):
        '''
            The __call__() method of your layer will automatically run build the first time it is called. 
            You now have a layer that's lazy and thus easier to use
            
            Notations:
                - H^C (= x): Convlutional operations on return sequence of LSTM
                - h_t: hidden state of current time
                - k: filters or timesteps - 1 
                - m: features
                
            x.shape = [None, k, m]
            h_t.shape = [None, m] 
        '''        

        features = self.features
        timesteps = self.timesteps
        
        logging.info("features = {}".format(features))
        logging.info("timesteps = {}".format(timesteps))
        logging.info("x.shape (batchsize, m, k) = {}".format(x.shape))
        logging.info("W.shape (k, m) = {}".format(self.W.shape))
        logging.info("h_t.shape (batchsize, m) (need to expand dim) = {}".format(h_t.shape))
        
        scored = tf.matmul(x, self.W)
        scored = tf.matmul(scored, tf.expand_dims(h_t, axis=-1))
        alpha_i = tf.math.sigmoid(scored)  # [None, k, 1]        
        context_vector = tf.matmul(tf.ones((1, features)), tf.multiply(alpha_i, x))  # row summation
        context_vector = tf.transpose(context_vector, perm=[0, 2, 1])  # [None, k, 1] 
        
        logging.info("scored.shape (batchsize, m, 1) = {}".format(scored.shape))
        logging.info("alpha_i.shape (batchsize, m, 1) = {}".format(alpha_i.shape))        
        logging.info("shape of W_h (m, m) = {}, h_t (batchsize, m, 1) = {}".format(self.W_h.shape, tf.reshape(h_t, (-1, features, 1)).shape))
        logging.info("shape of W_v (m, k) = {}, context_vector (batchsize, k, 1) = {}".format(self.W_v.shape, context_vector.shape))
        return tf.reshape(tf.matmul(self.W_h, tf.reshape(h_t, (-1, features, 1))) + tf.matmul(self.W_v, context_vector), (-1, features))
      
        # apply mask after the exp. will be re-normalized next
#         if mask is not None:
#             # Cast the mask to floatX to avoid float64 upcasting in theano
#             a *= tf.cast(mask, K.floatx())

#     def compute_output_shape(self, input_shape):
#         #return input_shape[0], input_shape[-1]
#         return input_shape[0], self.features_dim
    
    
    def get_config(self):
        config = {"timesteps": self.timesteps, 
                  "features": self.features, 
                  "W_regularizer": self.W_regularizer, 
                  "W_constraint": self.W_constraint}
        base_config = super(TPAttention, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

# Customized Callbacks

In [25]:
class ChangeableLossw(Callback):
    def __init__(self, lossw, wmultiplier):
    
        self.nlossw = 2
        self.lossw = lossw
        self.wmultiplier = wmultiplier
            
    def on_epoch_end(self, epoch, logs={}):
        if epoch <= 10:
#             logf = "epoch {}, ".format(epoch)
            for idx in range(self.nlossw):
                K.set_value(self.lossw[idx], K.get_value(self.lossw[idx]) * self.wmultiplier[idx])
#                 logf += "lossw_{} = {}, ".format(idx, K.get_value(self.lossw[idx]))
#             logf += "\n"
        
#             logging.info(logf) 

# Neuron Network Builder
- [Lambda](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Lambda)
- [LSTM](https://www.tensorflow.org/api_docs/python/tf/keras/layers/LSTM) 
- [Bidirectional](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Bidirectional)
- [Conv1D](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Conv1D)

In [26]:
class NNBuilder():
    def __init__(self, modeld="model", ckptd="ckpt", name="NNBuilder", optimizer="SGD"):
        
        if not os.path.exists(modeld):
            os.makedirs(modeld)
            
        if not os.path.exists(ckptd):
            os.makedirs(ckptd)
        
        self.name = name
        self.modeld = modeld
        self.ckptd = ckptd
        
#         self.callbacks = self._callbacks(modeld, ckptd, name=name)
#         self.optimizer = self._optimizer(name=optimizer)
        
    def setObjV(self, optimizer=None, callbacks=None):
        if optimizer is None:
            self.optimizer = self._optimizer()
        else:
            self.optimizer = self._optimizer(name=optimizer["name"], lr=optimizer["lr"])

        if callbacks is None:
            self.callbacks = self._callbacks()
        else:
            self.callbacks = self._callbacks(mmonitor=callbacks["mmonitor"], 
                                             emonitor=callbacks["emonitor"], 
                                             lmonitor=callbacks["lmonitor"])    
        
    def CNN1D(self, filters, inshape, outshape, outactfn=["sigmoid"], batchNormalization=True, dropouts=None, activations=None, optimizer=None, callbacks=None):
        
        self.name = "CNN1D"
        self.setObjV(optimizer, callbacks)
        
        _args       = NNBuilder._argreset(filters, dropouts=dropouts, activations=activations)
        units       = _args["units"]
        nlayer      = _args["nlayer"]
        dropouts    = _args["dropouts"]
        activations = _args["activations"]
                
        timesteps = inshape[0]
        nfeatures = inshape[1]
                
        inputs = Input(inshape, name="input")     
        for i in range(nlayer):
            if i == 0:
                x = Conv1D(filters=units[i], kernel_size=3, strides=1, name="Conv1D_{}".format(i + 1))(inputs)
            else:
                x = Conv1D(filters=units[i], kernel_size=3, strides=1, name="Conv1D_{}".format(i + 1))(x)
            if batchNormalization:
                x = BatchNormalization(name="BatchNormalization_{}".format(i + 1))(x)
            x = Activation(activations[i], name="Activation_{}".format(i + 1))(x)
            if dropouts is not None:
                x = Dropout(dropouts[i], name="Dropout_{}".format(i + 1))(x)

        x = MaxPool1D(pool_size=2)(x)
        x = Flatten()(x)
        loss1 = Dense(outshape[0], activation=outactfn[0], name="Loss1")(x)
        
        if len(outactfn) == len(outshape) == 2:
            loss2 = Dense(outshape[1], activation=outactfn[1], name="Loss2")(x)
            model = Model(inputs=inputs, outputs=[loss1, loss2], name="CNN1D")
        else:    
            model = Model(inputs=inputs, outputs=loss1, name="CNN1D")
        
        return [model, self.callbacks, self.optimizer]
    
    def bidirectionalLSTM(self, cells, inshape, outshape, outactfn=["sigmoid"], dropout=0, recurrent_dropout=0, merge_mode='concat', optimizer=None, callbacks=None):    
        
        '''
            input: [batchsize, timesteps, nfeatures]
            merge_mode: one of {'sum', 'mul', 'concat', 'ave', None}.
            if merge_mode='concat', then shape of LSTM output is [timesteps, # of cells * 2 (directions)] 
        '''
        
        self.name = "bidirectionalLSTM"
        self.setObjV(optimizer, callbacks)
        
        timesteps = inshape[0]
        nfeatures = inshape[1]
                
        inputs = Input(inshape, name="input")     
            
        nlayer = 1
        if isinstance(cells, list):
            units = cells
            nlayer = len(cells)
        else:
            units = [cells]          
       
        for idx in range(nlayer):
            if idx == 0:  # the first hidden layer
                x = Bidirectional(layer=LSTM(units[idx], return_sequences=True), 
                                  backward_layer=LSTM(units[idx], return_sequences=True, go_backwards=True), 
                                  merge_mode=merge_mode,
                                  name="BLSTM_{}".format(idx + 1))(inputs)
            else:
                x = Bidirectional(layer=LSTM(units[idx], return_sequences=True), 
                                  backward_layer=LSTM(units[idx], return_sequences=True, go_backwards=True), 
                                  merge_mode=merge_mode,
                                  name="BLSTM_{}".format(idx + 1))(x)

            logging.info("{0:02d}, x.shape = {1}".format(idx + 1, x.shape))

#         x = tf.strided_slice(x, [0, 2, 0], [-1, 2, 4])

        assert x.shape[1] == timesteps

        H = Lambda(lambda x: x[:, 0:-1, :], name="H")(x)
        h_t = Lambda(lambda x: x[:, -1, :], name="h_t")(x)
        x = TPCNN1D(filters=units[-1], name="TPCNN1D")(H)
        x = TPAttention(name="TPAttention")(x, h_t)

        loss1 = Dense(units[-1], activation="relu", name="LDense1")(x)
        loss1 = Dense(outshape[0], activation=outactfn[0], name="Loss1")(loss1)
        
        if len(outactfn) == len(outshape) == 2:
            loss2 = Dense(units[-1], activation="relu", name="LDense2")(x)
            loss2 = Dense(outshape[1], activation=outactfn[1], name="Loss2")(loss2)
            model = Model(inputs=inputs, outputs=[loss1, loss2], name="bidirectionalLSTM")
        else:    
            model = Model(inputs=inputs, outputs=loss1, name="bidirectionalLSTM")
        
        model.summary()
        
        return [model, self.callbacks, self.optimizer]

    def stackedLSTM(self, cells, inshape, outshape, outactfn=["sigmoid"], dropout=0, recurrent_dropout=0, optimizer=None, callbacks=None):    
        
        '''
            - dropout, applied to the first operation on the inputs
            - recurrent_dropout, applied to the other operation on the recurrent inputs (previous output and/or states)
        '''
        
        self.name = "stackedLSTM"
        self.setObjV(optimizer, callbacks)
        
        timesteps = inshape[0]
        nfeatures = inshape[1]
                
        inputs = Input(inshape, name="input")     
            
        nlayer = 1
        if isinstance(cells, list):
            units = cells
            nlayer = len(cells)
        else:
            units = [cells]
           
        if nlayer > 1:
            for idx in range(nlayer):
                if idx == 0:  # the first hidden layer
                    x = LSTM(units[idx], return_sequences=True, dropout=dropout, name="LSTM_{}".format(idx + 1))(inputs) 
                elif idx == nlayer - 1:  # the last hidden layer
                    x = LSTM(units[idx], recurrent_dropout=recurrent_dropout, name="LSTM_{}".format(idx + 1))(x) 
                else:
                    x = LSTM(units[idx], recurrent_dropout=recurrent_dropout, return_sequences=True, name="LSTM_{}".format(idx + 1))(x)
        else:
#             model.add(LSTM(units[0], input_shape=(timesteps, nfeatures), name="lstm"))
            x = LSTM(units[0], dropout=dropout, name="LSTM_1")(inputs)
                
        loss1 = Dense(units[-1], activation="relu", name="LDense1_1")(x)
        loss1 = Dense(outshape[0] * 3, activation="relu", name="LDense1_2")(loss1)
        loss1 = Dense(outshape[0], activation=outactfn[0], name="Loss1")(loss1)
        
        if len(outactfn) == len(outshape) == 2:
            loss2 = Dense(units[-1], activation="relu", name="LDense2_1")(x)
            loss2 = Dense(outshape[1] * 3, activation="relu", name="LDense2_2")(loss2)
            loss2 = Dense(outshape[1], activation=outactfn[1], name="Loss2")(loss2)
            model = Model(inputs=inputs, outputs=[loss1, loss2], name="stackedLSTM")
        else:    
            model = Model(inputs=inputs, outputs=loss1, name="stackedLSTM")
        
        return [model, self.callbacks, self.optimizer]
    
    def LSTMbasicAttention(self, shape, cells, optimizer=None, callbacks=None):
        '''
            shape = (timestep, feature)
            return [model, optimizer, callbacks]
        '''
        
        self.name = "stackedLSTM"
        self.setObjV(optimizer, callbacks)
        
        nfeatures = shape[1]
        
        inputs = Input(shape, name="input")  # return a tensor
        
        nlayer = 1
        if isinstance(cells, list):
            units = cells
        else:
            units = [cells]
                
        for idx, unit in enumerate(units):
            if idx == 0:
                x = LSTM(unit, return_sequences=True, name="LSTM_{}".format(idx))(inputs)
            else:
                x = LSTM(unit, return_sequences=True, name="LSTM_{}".format(idx))(x)
            x = Attention(shape[0])(x)
            
        outputs = Dense(nfeatures)(x)
        
        model = Model(inputs=inputs, outputs=outputs)
        
        return [model, self.callbacks, self.optimizer]
    
    
    def DNNLSTM(self, units, inshape, outshape, outactfn=["sigmoid"], batchNormalization=True, dropouts=None, activations=None, optimizer=None, callbacks=None):
        
        '''
            units: [units for Dense_1, ..., units for Dense_n, cells for LSTM_1], i.e. units[-1]: cells for LSTM_1
            inshape: (timesteps, # of features)
            outshape: an integer number for output layer (Dense), ex. 4
            
            NNB = NNBuilder()
            model, callbacks_, optimizer_ = NNB.DNNLSTM([10, 20, 30, 4], inshape=(10, 4), outshape=4, batchNormalization=None)
            model.summary()
            _________________________________________________________________
            Model: "DNNLSTM"
            _________________________________________________________________
            Layer (type)                 Output Shape              Param #   
            =================================================================
            input (InputLayer)           [(None, 10, 4)]           0         
            _________________________________________________________________
            TDense_1 (TimeDistributed)   (None, 10, 10)            50         (4 * 10 + 10)
            _________________________________________________________________
            Activation_1 (TimeDistribute (None, 10, 10)            0         
            _________________________________________________________________
            TDense_2 (TimeDistributed)   (None, 10, 20)            220        (10 * 20 + 20)  
            _________________________________________________________________
            Activation_2 (TimeDistribute (None, 10, 20)            0         
            _________________________________________________________________
            TDense_3 (TimeDistributed)   (None, 10, 30)            630        (20 * 30 + 30)    
            _________________________________________________________________
            Activation_3 (TimeDistribute (None, 10, 30)            0         
            _________________________________________________________________  ↓ (input gate, forget gate, output gate and neuron) 
            LSTM (LSTM)                  (None, 4)                 560        (4 * (30 * 4 + 4 + 4 * 4))
            _________________________________________________________________                    ↑ (cell state pass to the other cells) 
            output (Dense)               (None, 4)                 20         (4 * 4 + 4)       
            =================================================================
            Total params: 1,480
            Trainable params: 1,480
            Non-trainable params: 0
            
            (4 * 10 + 10) + (10 * 20 + 20) + (20 * 30 + 30) + 4 * (30 * 4 + 4 + 4 * 4) + (4 * 4 + 4)   
        '''
        
        self.name = "DNNLSTM"
        self.setObjV(optimizer, callbacks)
        
        assert len(units) >= 2
        
        inputs = Input(inshape, name="input")  

        nlayer = len(units) - 1
        if dropouts is not None:
            if isinstance(dropouts, list):
                assert nlayer == len(dropouts)
            else:
                dropouts = [dropouts for _ in range(nlayer)]
        
        if activations is not None:
            if isinstance(activations, list):
                assert nlayer == len(activations)
            else:
                activations = [activations for _ in range(nlayer)]
        else:
            activations = ["relu" for _ in range(nlayer)]
        
        for i in range(nlayer):
            if i == 0:
                x = TimeDistributed(Dense(units[i]), name="TDense_{}".format(i + 1))(inputs)
            else:
                x = TimeDistributed(Dense(units[i]), name="TDense_{}".format(i + 1))(x)
            if batchNormalization:
                x = TimeDistributed(BatchNormalization(), name="BatchNormalization_{}".format(i + 1))(x)
            x = TimeDistributed(Activation(activations[i]), name="Activation_{}".format(i + 1))(x)
            if dropouts is not None:
                x = TimeDistributed(Dropout(dropouts[i]), name="Dropout_{}".format(i + 1))(x)
        x = LSTM(units[-1], name="LSTM")(x)
        
        
        loss1 = Dense(units[-1], activation="relu", name="LDense1_1")(x)
        loss1 = Dense(outshape[0] * 3, activation="relu", name="LDense1_2")(loss1)
        loss1 = Dense(outshape[0], activation=outactfn[0], name="Loss1")(loss1)
        
        if len(outactfn) == len(outshape) == 2:
            loss2 = Dense(units[-1], activation="relu", name="LDense2_1")(x)
            loss2 = Dense(outshape[1] * 3, activation="relu", name="LDense2_2")(loss2)
            loss2 = Dense(outshape[1], activation=outactfn[1], name="Loss2")(loss2)
            model = Model(inputs=inputs, outputs=[loss1, loss2], name="DNNLSTM")
        else:    
            model = Model(inputs=inputs, outputs=loss1, name="DNNLSTM")
        
        return [model, self.callbacks, self.optimizer]

    
    def TPALSTM(self, optimizer=None, callbacks=None):
        
        self.name = "TPALSTM"
        self.setObjV(optimizer, callbacks)
        
        embedding_layer = Embedding(nb_words, EMBEDDING_DIM,
                                    weights=[embedding_matrix],
                                    input_length=MAX_SEQUENCE_LENGTH,
                                    trainable=False)
        
        lstm_layer = LSTM(num_lstm, dropout=rate_drop_lstm, recurrent_dropout=rate_drop_lstm)

        sequence_1_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
        embedded_sequences_1 = embedding_layer(sequence_1_input)
        x1 = lstm_layer(embedded_sequences_1)

        sequence_2_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
        embedded_sequences_2 = embedding_layer(sequence_2_input)
        y1 = lstm_layer(embedded_sequences_2)

        merged = concatenate([x1, y1])
        merged = Dropout(rate_drop_dense)(merged)
        merged = BatchNormalization()(merged)

        merged = Dense(num_dense, activation=act)(merged)
        merged = Dropout(rate_drop_dense)(merged)
        merged = BatchNormalization()(merged)

        preds = Dense(1, activation='sigmoid')(merged)

        ########################################
        ## add class weight
        ########################################
        if re_weight:
            class_weight = {0: 1.309028344, 1: 0.472001959}
        else:
            class_weight = None

        ########################################
        ## train the model
        ########################################
        model = Model(inputs=[sequence_1_input, sequence_2_input], \
                outputs=preds)
        model.compile(loss='binary_crossentropy',
                optimizer='nadam',
                metrics=['acc'])
        #model.summary()
        print(STAMP)

        early_stopping =EarlyStopping(monitor='val_loss', patience=3)
        bst_model_path = STAMP + '.h5'
        model_checkpoint = ModelCheckpoint(bst_model_path, save_best_only=True, save_weights_only=True)

        hist = model.fit([data_1_train, data_2_train], labels_train, \
                validation_data=([data_1_val, data_2_val], labels_val, weight_val), \
                epochs=200, batch_size=2048, shuffle=True, \
                class_weight=class_weight, callbacks=[early_stopping, model_checkpoint])

        model.load_weights(bst_model_path)
        bst_val_score = min(hist.history['val_loss'])

        ########################################
        ## make the submission
        ########################################
        print('Start making the submission before fine-tuning')

        preds = model.predict([test_data_1, test_data_2], batch_size=8192, verbose=1)
        preds += model.predict([test_data_2, test_data_1], batch_size=8192, verbose=1)
        preds /= 2

        submission = pd.DataFrame({'test_id':test_ids, 'is_duplicate':preds.ravel()})
        submission.to_csv('%.4f_'%(bst_val_score)+STAMP+'.csv', index=False)
    
    @staticmethod
    def DenseBuilder(units, inputs, batchNormalization=True, dropouts=None, activations=None, optimizer=None, callbacks=None):
        
        _args       = NNBuilder._argreset(units, dropouts=dropouts, activations=activations)
        units       = _args["units"]
        nlayer      = _args["nlayer"]
        dropouts    = _args["dropouts"]
        activations = _args["activations"]
        
        for i in range(nlayer):
            if i == 0:
                x = Dense(units[i], name="Dense_{}".format(i + 1))(inputs)
            else:
                x = Dense(units[i], name="Dense_{}".format(i + 1))(x)
            if batchNormalization:
                x = BatchNormalization(name="BatchNormalization_{}".format(i + 1))(x)
            x = Activation(activations[i], name="Activation_{}".format(i + 1))(x)
            if dropouts is not None:
                x = Dropout(dropouts[i], name="Dropout_{}".format(i + 1))(x)
    
        return x
        
    
    @staticmethod
    def _argreset(units, dropouts=None, activations=None):
        
        _args = dict()
        
        nlayer = 1
        if isinstance(units, list):
            nlayer = len(units)
        else:
            units = [units]
        
        _args["units"] = units
        _args["nlayer"] = nlayer
        
        if dropouts is not None:
            if isinstance(dropouts, list):
                assert nlayer == len(dropouts)
            else:
                dropouts = [dropouts for _ in range(nlayer)]
            _args["dropouts"] = dropouts
        else:
            _args["dropouts"] = None
        
        if activations is not None:
            if isinstance(activations, list):
                assert nlayer == len(activations)
            else:
                activations = [activations for _ in range(nlayer)]
            _args["activations"] = activations
        else:
            _args["activations"] = None
#             activations = ["relu" for _ in range(nlayer)]
            
        return _args

    
    def _callbacks(self, mmonitor="val_loss", emonitor="loss", lmonitor="val_loss"):

#     def _callbacks(modeld, ckptd, mmonitor="val_loss", emonitor="loss", lmonitor="val_loss", name="ckpt"):
        
        '''
            mmonitor: monitor for model 
            emonitor: monitor for earlystopping
            lmonitor: monitor for learning rate
        '''
        
        timestamp = datetime.now().strftime("%Y%m%d%H%M")
        
        _name = "{epoch:04d}_{loss:.3f}_{val_loss:.3f}"
#         checkpointer = ModelCheckpoint(filepath=os.path.join(modeld, "{0}_{1}_{2}.hdf5".format(name, _name, timestamp)),
        checkpointer = ModelCheckpoint(filepath=os.path.join(self.modeld, "{0}.hdf5".format(self.name)),
                                       verbose=0,
                                       save_best_only=True, 
                                       monitor=mmonitor)
        
        earlystopper = EarlyStopping(monitor=emonitor, patience=10)

        reduceLR = ReduceLROnPlateau(monitor=lmonitor, factor=0.5, patience=10, min_lr=0.0001)

#         reduceLR = ReduceLROnPlateau(monitor=lmonitor, factor=0.9, patience=10, min_lr=0.0001)
        
        tb = TensorBoard(log_dir=self.ckptd)

        csvlogger = CSVLogger(os.path.join(self.ckptd, "{}_{}.log".format(self.name, timestamp)), append=False, separator=",")

        # Learning rate schedule.
    #     lr_schedule = LearningRateScheduler(fixed_schedule, verbose=0)

        return [checkpointer, earlystopper, reduceLR, tb, csvlogger]

#         return [checkpointer, earlystopper, reduceLR, csvlogger]
    
    def _optimizer(self, lr=1e-3, name="Adam"):
#     def _optimizer(lr=1e-2, name="Adam"):
        if name == "SGD":
            optimizer = SGD(learning_rate=lr, momentum=0.9, nesterov=True)
        else:
            optimizer = Adam(learning_rate=lr)
        return optimizer
    
    @staticmethod
    def mloader(filepath, custom_objects=None):
        if custom_objects is not None:
            return load_model(filepath, custom_objects=custom_objects)
        else:
            return load_model(filepath)
            
        

In [27]:
class MArgs(object):
    def __init__(self, mname, testf=None, train=None):
        self.mname = mname
        self.testf = testf
        self.train = train


In [34]:
if __name__ == "__main__":
    
    gpuId = 1
    mname = "bidirectionalLSTM"
    args = MArgs(mname, train=[100, 100, 10000, 6, 5, 4, 5])
    
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = "{}".format(gpuId)  # only device you want to use can visible

    gpus = tf.config.experimental.list_physical_devices('GPU')
    print(gpus)
    if gpus:
        try:
            tf.config.experimental.set_memory_growth(gpus[0], enable=True)
        except RuntimeError as e:
            print(e)
    
    if args.testf == "_argreset":
        _args = NNBuilder._argreset([10, 20, 30], dropouts=0.5, activations="relu")
        print(_args)
    
    
    if args.train is not None:
        epochs, batch_size, nsize, nstep, nfeature, ntarget, nclass = args.train
        X_train = np.random.random_sample((nsize, nstep, nfeature))
        y_train = [np.random.random_sample((nsize, ntarget)), pd.get_dummies(pd.Series(np.random.randint(low=0, high=nclass, size=nsize)))]
        print("****** epochs = {}, batch_size = {}, nsize = {}, nstep = {}, nfeature = {}, ntarget = {}, nclass = {}".format(epochs, batch_size, nsize, nstep, nfeature, ntarget, nclass))
        print("****** X_train.shape = {}, y_train[0].shape = {}, y_train[1].shape = {}".format(X_train.shape, y_train[0].shape, y_train[1].shape))
        
    
    if args.mname == "DNNbuilder":
        inputs = Input(shape=(10), name="Input")
        x = NNBuilder.DenseBuilder([10, 30, 20, 40], inputs, dropouts=0.25, activations="relu")
        model = Model(inputs=inputs, outputs=x, name="DNN")
        model.summary()

    elif args.mname == "DNNLSTM":
        NNB = NNBuilder()
        model, callbacks_, optimizer_ = NNB.DNNLSTM([10, 20, 30, 10], inshape=(6, 4), outshape=[4, 1], outactfn=["tanh", "sigmoid"], batchNormalization=None)
        model.summary()
        plot_model(model, to_file="DNNLSTM.png", show_shapes=True)

        model.compile(loss={"regression_output": "mae", "classification_output": "binary_crossentropy"},
                      metrics={"regression_output": "mae", "classification_output": "accuracy"},
                      optimizer=optimizer_)

        epochs = 1000
        batch_size = 10
        n = 10000
        X_train = np.random.random_sample((n, 6, 4))
        y_train = [np.random.random_sample((n, 4)), np.random.randint(low=0, high=2, size=(n, 1))]
        history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, callbacks=callbacks_, validation_split=0.1, verbose=2, shuffle=True)

    elif args.mname == "stackedLSTM":
        stackedLSTM, callbacks_, optimizer_ = NNBuilder().stackedLSTM(cells=[10, 20], inshape=[6, 4], outshape=[4, 4], outactfn=["sigmoid", "softmax"])
        stackedLSTM.compile(loss="mae", optimizer=optimizer_)
        stackedLSTM.summary()

        epochs = 3
        batch_size = 200
        X_train = np.random.random_sample((1000, 6, 4))
        y_train = [np.random.random_sample((1000, 4)), np.random.randint(low=0, high=2, size=1000)]


    #     history = stackedLSTM.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, callbacks=callbacks_, validation_split=0.1, verbose=2, shuffle=True)

    #     saved_model = "/home/yuzhe/DataScience/QC/model/lstm1_0154_0.009_0.008_202008071814.hdf5"
    #     model = NNBuilder.mloader(saved_model)

    elif args.mname == "CNN1D":
        CNN1D, callbacks_, optimizer_ = NNBuilder().CNN1D(filters=[10, 20], inshape=[6, 4], outshape=[2], outactfn=["sigmoid"], activations="relu")
        CNN1D.summary()

    #     train(X_train, y_train, 30, 5000, loss=YZKError(element_weight=[1 / 6., 1 / 6., 1 / 6., 1 / 2.]), name="NNBuilderTest1")
    #     train(X_train, y_train, 30, 5000, loss=YZKError(), name="NNBuilderTest")

    elif args.mname == "bidirectionalLSTM":
        bLSTM, callbacks_, optimizer_ = NNBuilder().bidirectionalLSTM(merge_mode="concat", cells=[10, 20], inshape=[nstep, nfeature], outshape=[ntarget, nclass], outactfn=["sigmoid", "softmax"])

        bLSTM.compile(loss={"Loss1": "mae", "Loss2": "categorical_crossentropy"}, optimizer=optimizer_)
        print(y_train[1])
        print(X_train.shape, y_train[0].shape, y_train[1].shape)
        history = bLSTM.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, callbacks=callbacks_, validation_split=0.3, verbose=1, shuffle=True)

        
#         (None, 4, 20) mul

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
****** epochs = 100, batch_size = 100, nsize = 10000, nstep = 6, nfeature = 5, ntarget = 4, nclass = 5
****** X_train.shape = (10000, 6, 5), y_train[0].shape = (10000, 4), y_train[1].shape = (10000, 5)


2021-02-25 15:56:57,047, bidirectionalLSTM-103-INFO: 01, x.shape = (None, 6, 20)
2021-02-25 15:56:57,429, bidirectionalLSTM-103-INFO: 02, x.shape = (None, 6, 40)
2021-02-25 15:56:57,498, converted_call-603-INFO: features = 40
2021-02-25 15:56:57,512, converted_call-603-INFO: timesteps = 21
2021-02-25 15:56:57,526, converted_call-603-INFO: x.shape (batchsize, m, k) = (None, 40, 20)
2021-02-25 15:56:57,539, converted_call-603-INFO: W.shape (k, m) = (20, 40)
2021-02-25 15:56:57,554, converted_call-603-INFO: h_t.shape (batchsize, m) (need to expand dim) = (None, 40)
2021-02-25 15:56:57,573, converted_call-603-INFO: scored.shape (batchsize, m, 1) = (None, 40, 1)
2021-02-25 15:56:57,587, converted_call-603-INFO: alpha_i.shape (batchsize, m, 1) = (None, 40, 1)
2021-02-25 15:56:57,601, converted_call-603-INFO: shape of W_h (m, m) = (40, 40), h_t (batchsize, m, 1) = (None, 40, 1)
2021-02-25 15:56:57,615, converted_call-603-INFO: shape of W_v (m, k) = (40, 20), context_vector (batchsize, k, 1) =

Tensor("TPCNN1D/TPCNN1D/add_2:0", shape=(None, 20, 40), dtype=float32)
Model: "bidirectionalLSTM"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input (InputLayer)              [(None, 6, 5)]       0                                            
__________________________________________________________________________________________________
BLSTM_1 (Bidirectional)         (None, 6, 20)        1280        input[0][0]                      
__________________________________________________________________________________________________
BLSTM_2 (Bidirectional)         (None, 6, 40)        6560        BLSTM_1[0][0]                    
__________________________________________________________________________________________________
H (Lambda)                      (None, 5, 40)        0           BLSTM_2[0][0]                    
___________

2021-02-25 15:56:58,850, converted_call-603-INFO: features = 40
2021-02-25 15:56:58,864, converted_call-603-INFO: timesteps = 21
2021-02-25 15:56:58,879, converted_call-603-INFO: x.shape (batchsize, m, k) = (100, 40, 20)
2021-02-25 15:56:58,893, converted_call-603-INFO: W.shape (k, m) = (20, 40)
2021-02-25 15:56:58,906, converted_call-603-INFO: h_t.shape (batchsize, m) (need to expand dim) = (100, 40)
2021-02-25 15:56:58,927, converted_call-603-INFO: scored.shape (batchsize, m, 1) = (100, 40, 1)
2021-02-25 15:56:58,943, converted_call-603-INFO: alpha_i.shape (batchsize, m, 1) = (100, 40, 1)
2021-02-25 15:56:58,959, converted_call-603-INFO: shape of W_h (m, m) = (40, 40), h_t (batchsize, m, 1) = (100, 40, 1)
2021-02-25 15:56:58,974, converted_call-603-INFO: shape of W_v (m, k) = (40, 20), context_vector (batchsize, k, 1) = (100, 20, 1)


Tensor("bidirectionalLSTM/TPCNN1D/TPCNN1D/add:0", shape=(100, 20, 40), dtype=float32)


2021-02-25 15:57:00,969, converted_call-603-INFO: features = 40
2021-02-25 15:57:00,985, converted_call-603-INFO: timesteps = 21
2021-02-25 15:57:00,999, converted_call-603-INFO: x.shape (batchsize, m, k) = (100, 40, 20)
2021-02-25 15:57:01,013, converted_call-603-INFO: W.shape (k, m) = (20, 40)
2021-02-25 15:57:01,027, converted_call-603-INFO: h_t.shape (batchsize, m) (need to expand dim) = (100, 40)
2021-02-25 15:57:01,048, converted_call-603-INFO: scored.shape (batchsize, m, 1) = (100, 40, 1)
2021-02-25 15:57:01,063, converted_call-603-INFO: alpha_i.shape (batchsize, m, 1) = (100, 40, 1)
2021-02-25 15:57:01,079, converted_call-603-INFO: shape of W_h (m, m) = (40, 40), h_t (batchsize, m, 1) = (100, 40, 1)
2021-02-25 15:57:01,093, converted_call-603-INFO: shape of W_v (m, k) = (40, 20), context_vector (batchsize, k, 1) = (100, 20, 1)


Tensor("bidirectionalLSTM/TPCNN1D/TPCNN1D/add:0", shape=(100, 20, 40), dtype=float32)






2021-02-25 15:57:04,729, converted_call-603-INFO: features = 40
2021-02-25 15:57:04,745, converted_call-603-INFO: timesteps = 21
2021-02-25 15:57:04,761, converted_call-603-INFO: x.shape (batchsize, m, k) = (100, 40, 20)
2021-02-25 15:57:04,776, converted_call-603-INFO: W.shape (k, m) = (20, 40)
2021-02-25 15:57:04,791, converted_call-603-INFO: h_t.shape (batchsize, m) (need to expand dim) = (100, 40)
2021-02-25 15:57:04,812, converted_call-603-INFO: scored.shape (batchsize, m, 1) = (100, 40, 1)
2021-02-25 15:57:04,828, converted_call-603-INFO: alpha_i.shape (batchsize, m, 1) = (100, 40, 1)
2021-02-25 15:57:04,843, converted_call-603-INFO: shape of W_h (m, m) = (40, 40), h_t (batchsize, m, 1) = (100, 40, 1)
2021-02-25 15:57:04,858, converted_call-603-INFO: shape of W_v (m, k) = (40, 20), context_vector (batchsize, k, 1) = (100, 20, 1)


Tensor("bidirectionalLSTM/TPCNN1D/TPCNN1D/add:0", shape=(100, 20, 40), dtype=float32)
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100

In [29]:
def logcosh(a, t):
    return (1 / a) * np.log(np.cosh(a * t))

In [30]:
if __name__ == "__main__":
    
#### check losses     
    
    from dgenerator import dgenerator

    from sklearn.preprocessing import MinMaxScaler
    
    tperiod = [2016010101, 2016123124]
    n_in = 6
    n_out = 1
    mode = "test"
    vstack = True
    fnpy = True
    npyd = "/home/yuzhe/DataScience/dataset"
    gif = "/home/yuzhe/CODE/ProgramT1/GRDTools/SRC/RES/GI/1500_decode_stationlist_without_space.txt"

    dg = dgenerator(gif=gif, npyd=npyd)
    vinfo = pd.DataFrame(dg.vrange)  
    vinfo = pd.DataFrame(vinfo)
    print(vinfo)
#     vinfo = {"Temp": [-20.0, 50.0],
#              "RH": [0.0, 100.0], 
#              "Pres": [600.0, 1100.0], 
#              "Precp": [0.0, 220.0]}
    dataset = dg.hrfgenerator(tperiod, n_in=n_in, n_out=n_out, mode=mode, rescale=True, reformat=True, vstack=vstack, fnpy=fnpy, generator=False)
    
    datetimes = dataset[1]
    nsize = len(datetimes)
    print(dataset[0].shape)

    
    saved_model = "../QC/model/lstm1_0055_0.008_0.011_202008111819_2.hdf5"
    model = NNBuilder.mloader(saved_model)
    
    scaler = MinMaxScaler()
    scaler.fit(vinfo.values)
    
    print(scaler.inverse_transform([[0.7, 0.6, 0.7, 0.2]]))

    fig, ax = plt.subplots(figsize=(16, 10))
    
    n = nsize
    
#     x = dataset[0][:, -4:]
#     x = x[~np.isnan(x).any(axis=1)]
#     idx = np.random.choice(np.arange(x.shape[0]), n, replace=False)
#     x = x[idx, 0:]
#     x = tf.convert_to_tensor(x)

    scaled = dataset[0]
    scaled = scaled[~np.isnan(scaled).any(axis=1)]
    X_test = np.reshape(scaled[:, :-4], (-1, 6, 4))
    y_true = scaled[:, -4:]

    y_true = tf.convert_to_tensor(y_true, dtype=tf.float32)
    y_pred = model.predict(X_test)
    y_pred = tf.convert_to_tensor(y_pred, dtype=tf.float32)
#     xynorm = tf.norm(tf.subtract(y_pred, y_true), axis=1) 

    y_true = tf.reshape(y_true[:, -1], [-1, 1])
    y_pred = tf.reshape(y_pred[:, -1], [-1, 1])
    xynorm = tf.subtract(y_pred, y_true)

    print(y_true.shape, y_true)
    print(y_pred.shape, y_pred)
    print(xynorm.shape)
    
    sample_weight = 1
#     sample_weight = tf.broadcast_to(sample_weight, y_pred.shape)
    
    loss = YZKError(reduction=tf.losses.Reduction.NONE)(y_true, y_pred)
    ax.scatter(xynorm, loss, label="YZK")
    print('yzk-loss: ', loss)
    
    loss = tf.losses.LogCosh(reduction=tf.losses.Reduction.NONE)(y_true, y_pred)
#     loss = tf.sort(loss)

    print("shape of loss = {}, xynorm = {}".format(loss, xynorm.shape))
    ax.scatter(xynorm, loss, label="LogCosh")
#     mposi1 = y_true[tf.where(loss == tf.math.reduce_max(loss)).numpy()[0, 0], :]
#     mposi2 = y_pred[tf.where(loss == tf.math.reduce_max(loss)).numpy()[0, 0], :]
#     print(mposi1, mposi2)

    loss = tf.losses.MeanAbsoluteError(reduction=tf.losses.Reduction.NONE)(y_true, y_pred)
#     loss = tf.sort(loss)
    ax.scatter(xynorm, loss, label="MAE")
#     mposi1 = y_true[tf.where(loss == tf.math.reduce_max(loss)).numpy()[0, 0], :]
#     mposi2 = y_pred[tf.where(loss == tf.math.reduce_max(loss)).numpy()[0, 0], :]
#     print(mposi1, mposi2)

    loss = tf.losses.CosineSimilarity(reduction=tf.losses.Reduction.NONE)(y_true, y_pred, sample_weight=sample_weight)
#     loss = tf.sort(loss)
    ax.scatter(xynorm, loss, label="Cos")
#     mposi1 = y_true[tf.where(loss == tf.math.reduce_max(loss)).numpy()[0, 0], :]
#     mposi2 = y_pred[tf.where(loss == tf.math.reduce_max(loss)).numpy()[0, 0], :]
#     print(mposi1, mposi2)


#     loss = tf.keras.losses.KLDivergence(reduction=tf.losses.Reduction.NONE)(y_true, y_pred)
#     loss = tf.sort(loss)
#     ax.scatter(xynorm, loss, label="KL")

    loss = tf.keras.losses.MeanSquaredError(reduction=tf.losses.Reduction.NONE)(y_true, y_pred)
    ax.scatter(xynorm, loss, label="MSE")


    loss = tf.keras.losses.MeanSquaredLogarithmicError(reduction=tf.losses.Reduction.NONE)(y_true, y_pred)
#     loss = tf.sort(loss)
    ax.scatter(xynorm, loss, label="MSLE")
#     mposi1 = y_true[tf.where(loss == tf.math.reduce_max(loss)).numpy()[0, 0], :]
#     mposi2 = y_pred[tf.where(loss == tf.math.reduce_max(loss)).numpy()[0, 0], :]
#     print(mposi1, mposi2)
    
    loss = tf.keras.losses.Huber(reduction=tf.losses.Reduction.NONE, delta=0.25)(y_true, y_pred, sample_weight=sample_weight)
#     loss = tf.sort(loss)
    ax.scatter(xynorm, loss, label="Huber")
#     mposi1 = y_true[tf.where(loss == tf.math.reduce_max(loss)).numpy()[0, 0], :]
#     mposi2 = y_pred[tf.where(loss == tf.math.reduce_max(loss)).numpy()[0, 0], :]
#     print(mposi1, mposi2)

    ax.legend()

2021-02-25 15:29:32,216, hrfgenerator-367-INFO: hrfgenerator-vnames-199: ['Temp', 'RH', 'Pres', 'Precp']


   Temp     RH    Pres  Precp
0 -20.0    0.0   600.0    0.0
1  50.0  100.0  1100.0  220.0


FileNotFoundError: [Errno 2] No such file or directory: '/home/yuzhe/DataScience/dataset/hrf_2016010101_2016123124_test.npy'

In [None]:
def generate_sample_weights(training_data, class_weight_dictionary): 
    sample_weights = [class_weight_dictionary[np.where(one_hot_row==1)[0][0]] for one_hot_row in training_data]
    return np.asarray(sample_weights)