- performance evaluation
- date: 2020-08-07
- maintainer: YZK

In [28]:
# jupyter nbconvert --to script mbuilder.ipynb

In [1]:
from datetime import datetime, timedelta
import argparse
import logging
import math
import os
import re
import sys

from collections import deque, Counter
from fbprophet import Prophet
from functools import partial
from imblearn.under_sampling import ClusterCentroids
from imblearn.under_sampling import RandomUnderSampler
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow.keras import Sequential, Model, losses
from tensorflow.keras.layers import Layer
from tensorflow.keras.layers import Activation, BatchNormalization, Dense, Dropout, Input, LSTM, TimeDistributed
from tensorflow.keras.losses import Loss
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam, SGD
# from tensorflow.keras import initializers, regularizers, constraints
from tensorflow.keras.callbacks import CSVLogger, EarlyStopping, LearningRateScheduler, ModelCheckpoint, ReduceLROnPlateau, TensorBoard
from tensorflow.keras.utils import plot_model

# from tensorflow.keras import losses

from tensorflow.python.keras.losses import LossFunctionWrapper
from tensorflow.python.keras.utils import losses_utils
from tensorflow.python.util.tf_export import keras_export

# @keras_export('keras.losses.CosineSimilarityCB')


# from keras.models import Sequential, Model
# from keras.layers import Layer, Dense, Input, LSTM
# from keras.optimizers import SGD
# from keras import initializers, regularizers, constraints
# from keras.callbacks import CSVLogger, EarlyStopping, LearningRateScheduler, ModelCheckpoint, ReduceLROnPlateau, TensorBoard


In [1178]:
# os.environ["CUDA_VISIBLE_DEVICES"] = "1"

In [5]:
def lstmbuilder(units, input_shape, loss, optimizer):
    '''
        input_shape: a tuple (timesteps, nfeatures)
    '''
    
    lstm = Sequential()
    lstm.add(LSTM(units, input_shape=input_shape))
    lstm.add(Dense(1))
    lstm.compile(loss=loss, optimizer=optimizer)
             
    return lstm


# lstmbuilder(10, (10, 3), 'mae', SGD()).summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 10)                560       
_________________________________________________________________
dense (Dense)                (None, 1)                 11        
Total params: 571
Trainable params: 571
Non-trainable params: 0
_________________________________________________________________


In [4]:
class Attention(Layer):
    def __init__(self, step_dim,
                 W_regularizer=None, b_regularizer=None,
                 W_constraint=None, b_constraint=None,
                 bias=True, **kwargs):
        """
        
        Keras Layer that implements an Attention mechanism for temporal data.
        Supports Masking. 
        Follows the work of Raffel et al. [https://arxiv.org/abs/1512.08756]
        # Input shape
            3D tensor with shape: `(samples, steps, features)`.
        # Output shape
            2D tensor with shape: `(samples, features)`.
        :param kwargs:
        Just put it on top of an RNN Layer (GRU/LSTM/SimpleRNN) with return_sequences=True.
        The dimensions are inferred based on the output shape of the RNN.
        Example:
            model.add(LSTM(64, return_sequences=True))
            model.add(Attention())
        
        tensorflow & keras reference:
            https://www.tensorflow.org/guide/keras/custom_layers_and_models
            https://www.tensorflow.org/guide/keras/masking_and_padding
            https://www.tensorflow.org/api_docs/python/tf/keras/layers/Masking
            
        """
        self.supports_masking = True
        #self.init = initializations.get('glorot_uniform')
        self.init = initializers.get('glorot_uniform')

        self.W_regularizer = regularizers.get(W_regularizer)
        self.b_regularizer = regularizers.get(b_regularizer)

        self.W_constraint = constraints.get(W_constraint)
        self.b_constraint = constraints.get(b_constraint)

        self.bias = bias
        self.step_dim = step_dim
        self.features_dim = 0
        super(Attention, self).__init__(**kwargs)  # inherit Layer

    def build(self, input_shape):
        '''
            deferring weight creation until the shape of the inputs is known
            input_shape[-1] is the number of features if len(input_shape) == 3
        '''
        
        assert len(input_shape) == 3

        self.W = self.add_weight((input_shape[-1],),
                                 initializer=self.init,
                                 name='{}_W'.format(self.name),
                                 regularizer=self.W_regularizer,
                                 constraint=self.W_constraint)
        
#         self.step_dim = input_shape[-2]
        self.features_dim = input_shape[-1]

        if self.bias:
            self.b = self.add_weight((input_shape[1],),
                                     initializer='zero',
                                     name='{}_b'.format(self.name),
                                     regularizer=self.b_regularizer,
                                     constraint=self.b_constraint)
        else:
            self.b = None

        self.built = True

    def compute_mask(self, input, input_mask=None):
        # do not pass the mask to the next layers
        return None

    def call(self, x, mask=None):
        '''
            The __call__() method of your layer will automatically run build the first time it is called. 
            You now have a layer that's lazy and thus easier to use
        '''
        
        # eij = K.dot(x, self.W) TF backend doesn't support it

        # features_dim = self.W.shape[0]
        # step_dim = x._keras_shape[1]

        features_dim = self.features_dim
        step_dim = self.step_dim
        
        eij = K.reshape(K.dot(K.reshape(x, (-1, features_dim)), K.reshape(self.W, (features_dim, 1))), (-1, step_dim))

        if self.bias:
            eij += self.b

        eij = K.tanh(eij)

        a = K.exp(eij)

        # apply mask after the exp. will be re-normalized next
        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            a *= K.cast(mask, K.floatx())

        # in some cases especially in the early stages of training the sum may be almost zero
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        a = K.expand_dims(a)
        weighted_input = x * a
    #print weigthted_input.shape
        return K.sum(weighted_input, axis=1)

    def compute_output_shape(self, input_shape):
        #return input_shape[0], input_shape[-1]
        return input_shape[0], self.features_dim

In [59]:
class NNBuilder():
    def __init__(self, modeld="model", ckptd="ckpt", name="NNBuilder"):
        
        if not os.path.exists(modeld):
            os.makedirs(modeld)
            
        if not os.path.exists(ckptd):
            os.makedirs(ckptd)
        
        self.name = name
        self.modeld = modeld
        self.ckptd = ckptd
        self.callbacks = self._callbacks(modeld, ckptd, name=name)
        self.optimizer = self._optimizer(name="SGD")
        
    def TPALSTM(self):
        embedding_layer = Embedding(nb_words, EMBEDDING_DIM,
                                    weights=[embedding_matrix],
                                    input_length=MAX_SEQUENCE_LENGTH,
                                    trainable=False)
        
        lstm_layer = LSTM(num_lstm, dropout=rate_drop_lstm, recurrent_dropout=rate_drop_lstm)

        sequence_1_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
        embedded_sequences_1 = embedding_layer(sequence_1_input)
        x1 = lstm_layer(embedded_sequences_1)

        sequence_2_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
        embedded_sequences_2 = embedding_layer(sequence_2_input)
        y1 = lstm_layer(embedded_sequences_2)

        merged = concatenate([x1, y1])
        merged = Dropout(rate_drop_dense)(merged)
        merged = BatchNormalization()(merged)

        merged = Dense(num_dense, activation=act)(merged)
        merged = Dropout(rate_drop_dense)(merged)
        merged = BatchNormalization()(merged)

        preds = Dense(1, activation='sigmoid')(merged)

        ########################################
        ## add class weight
        ########################################
        if re_weight:
            class_weight = {0: 1.309028344, 1: 0.472001959}
        else:
            class_weight = None

        ########################################
        ## train the model
        ########################################
        model = Model(inputs=[sequence_1_input, sequence_2_input], \
                outputs=preds)
        model.compile(loss='binary_crossentropy',
                optimizer='nadam',
                metrics=['acc'])
        #model.summary()
        print(STAMP)

        early_stopping =EarlyStopping(monitor='val_loss', patience=3)
        bst_model_path = STAMP + '.h5'
        model_checkpoint = ModelCheckpoint(bst_model_path, save_best_only=True, save_weights_only=True)

        hist = model.fit([data_1_train, data_2_train], labels_train, \
                validation_data=([data_1_val, data_2_val], labels_val, weight_val), \
                epochs=200, batch_size=2048, shuffle=True, \
                class_weight=class_weight, callbacks=[early_stopping, model_checkpoint])

        model.load_weights(bst_model_path)
        bst_val_score = min(hist.history['val_loss'])

        ########################################
        ## make the submission
        ########################################
        print('Start making the submission before fine-tuning')

        preds = model.predict([test_data_1, test_data_2], batch_size=8192, verbose=1)
        preds += model.predict([test_data_2, test_data_1], batch_size=8192, verbose=1)
        preds /= 2

        submission = pd.DataFrame({'test_id':test_ids, 'is_duplicate':preds.ravel()})
        submission.to_csv('%.4f_'%(bst_val_score)+STAMP+'.csv', index=False)
    

    def stackedLSTM(self, shape, cells, target="regression"):    
        
        timesteps = shape[0]
        nfeatures = shape[1]
                
        nlayer = 1
        if isinstance(cells, list):
            units = cells
            nlayer = len(cells)
        else:
            units = [cells]
   
        model = Sequential()
        
        if nlayer > 1:
            for idx in range(nlayer):
                if idx == 0:  # the first hidden layer
                    model.add(LSTM(units[idx], input_shape=(timesteps, nfeatures), return_sequences=True, name="lstm_{}".format(idx))) 
                elif idx == nlayer - 1:  # the last hidden layer
                    model.add(LSTM(units[idx], name="lstm_{}".format(idx))) 
                else:
                    model.add(LSTM(units[idx], return_sequences=True, name="lstm_{}".format(idx))) 
        else:
#             model.add(LSTM(units[0], input_shape=(timesteps, nfeatures), name="lstm"))
            model.add(LSTM(units[0], input_shape=(timesteps, nfeatures), name="lstm_0")) 
                
        if target == "regression":
            model.add(Dense(nfeatures, activation='sigmoid', name="dense"))  # for regression
        else:
            model.add(Dense(nfeatures, activation='softmax', name="dense"))  # for classification
        
        return [model, self.callbacks, self.optimizer]
    
    def LSTMbasicAttention(self, shape, cells):
        '''
            shape = (timestep, feature)
            return [model, optimizer, callbacks]
        '''
        
        nfeatures = shape[1]
        
        inputs = Input(shape, name="input")  # return a tensor
        
        nlayer = 1
        if isinstance(cells, list):
            units = cells
        else:
            units = [cells]
                
        for idx, unit in enumerate(units):
            if idx == 0:
                x = LSTM(unit, return_sequences=True, name="LSTM_{}".format(idx))(inputs)
            else:
                x = LSTM(unit, return_sequences=True, name="LSTM_{}".format(idx))(x)
            x = Attention(shape[0])(x)
            
        outputs = Dense(nfeatures)(x)
        
        model = Model(inputs=inputs, outputs=outputs)
        
        return [model, self.callbacks, self.optimizer]
    
    
    def DNNLSTM(self, units, inshape, outshape, outactfn=["sigmoid"], batchNormalization=True, dropouts=None, activations=None):
        
        '''
            units: [units for Dense_1, ..., units for Dense_n, cells for LSTM_1], i.e. units[-1]: cells for LSTM_1
            inshape: (timesteps, # of features)
            outshape: an integer number for output layer (Dense), ex. 4
            
            NNB = NNBuilder()
            model, callbacks_, optimizer_ = NNB.DNNLSTM([10, 20, 30, 4], inshape=(10, 4), outshape=4, batchNormalization=None)
            model.summary()
            _________________________________________________________________
            Model: "DNNLSTM"
            _________________________________________________________________
            Layer (type)                 Output Shape              Param #   
            =================================================================
            input (InputLayer)           [(None, 10, 4)]           0         
            _________________________________________________________________
            TDense_1 (TimeDistributed)   (None, 10, 10)            50         (4 * 10 + 10)
            _________________________________________________________________
            Activation_1 (TimeDistribute (None, 10, 10)            0         
            _________________________________________________________________
            TDense_2 (TimeDistributed)   (None, 10, 20)            220        (10 * 20 + 20)  
            _________________________________________________________________
            Activation_2 (TimeDistribute (None, 10, 20)            0         
            _________________________________________________________________
            TDense_3 (TimeDistributed)   (None, 10, 30)            630        (20 * 30 + 30)    
            _________________________________________________________________
            Activation_3 (TimeDistribute (None, 10, 30)            0         
            _________________________________________________________________  ↓ (input gate, forget gate, output gate and neuron) 
            LSTM (LSTM)                  (None, 4)                 560        (4 * (30 * 4 + 4 + 4 * 4))
            _________________________________________________________________                    ↑ (cell state pass to the other cells) 
            output (Dense)               (None, 4)                 20         (4 * 4 + 4)       
            =================================================================
            Total params: 1,480
            Trainable params: 1,480
            Non-trainable params: 0
            
            (4 * 10 + 10) + (10 * 20 + 20) + (20 * 30 + 30) + 4 * (30 * 4 + 4 + 4 * 4) + (4 * 4 + 4)   
        '''
        
        assert len(units) >= 2
        
        inputs = Input(inshape, name="input")  

        nlayer = len(units) - 1
        if dropouts is not None:
            if isinstance(dropouts, list):
                assert nlayer == len(dropouts)
            else:
                dropouts = [dropouts for _ in range(nlayer)]
        
        if activations is not None:
            if isinstance(activations, list):
                assert nlayer == len(activations)
            else:
                activations = [activations for _ in range(nlayer)]
        else:
            activations = ["relu" for _ in range(nlayer)]
        
        for i in range(nlayer):
            if i == 0:
                x = TimeDistributed(Dense(units[i]), name="TDense_{}".format(i + 1))(inputs)
            else:
                x = TimeDistributed(Dense(units[i]), name="TDense_{}".format(i + 1))(x)
            if batchNormalization:
                x = TimeDistributed(BatchNormalization(), name="BatchNormalization_{}".format(i + 1))(x)
            x = TimeDistributed(Activation(activations[i]), name="Activation_{}".format(i + 1))(x)
            if dropouts is not None:
                x = TimeDistributed(Dropout(dropouts[i]), name="Dropout_{}".format(i + 1))(x)
        x = LSTM(units[-1], name="LSTM")(x)
        
        
        regloss = Dense(units[-1], activation="relu", name="RDense_1")(x)
        regloss = Dense(outshape[0] * 3, activation="relu", name="RDense_2")(regloss)
        regloss = Dense(outshape[0], activation=outactfn[0], name="regression_output")(regloss)
        
        if len(outactfn) == len(outshape) == 2:
            clsloss = Dense(units[-1], activation="relu", name="CDense_1")(x)
            clsloss = Dense(outshape[1] * 3, activation="relu", name="CDense_2")(clsloss)
            clsloss = Dense(outshape[1], activation=outactfn[1], name="classification_output")(clsloss)
            model = Model(inputs=inputs, outputs=[regloss, clsloss], name="DNNLSTM")
        else:    
            model = Model(inputs=inputs, outputs=regloss, name="DNNLSTM")
        
        return [model, self.callbacks, self.optimizer]

    
    @staticmethod
    def DenseBuilder(units, inputs, batchNormalization=True, dropouts=None, activations=None):
        
        nlayer = 1
        if isinstance(units, list):
            nlayer = len(units)
        else:
            units = [units]
        
        if dropouts is not None:
            if isinstance(dropouts, list):
                assert nlayer == len(dropouts)
            else:
                dropouts = [dropouts for _ in range(nlayer)]
        
        if activations is not None:
            if isinstance(activations, list):
                assert nlayer == len(activations)
            else:
                activations = [activations for _ in range(nlayer)]
        else:
            activations = ["relu" for _ in range(nlayer)]
        
        for i in range(nlayer):
            print(i)
            if i == 0:
                x = Dense(units[i], name="Dense_{}".format(i + 1))(inputs)
            else:
                x = Dense(units[i], name="Dense_{}".format(i + 1))(x)
            if batchNormalization:
                x = BatchNormalization(name="BatchNormalization_{}".format(i + 1))(x)
            x = Activation(activations[i], name="Activation_{}".format(i + 1))(x)
            if dropouts is not None:
                x = Dropout(dropouts[i], name="Dropout_{}".format(i + 1))(x)
    
        return x
        
    @staticmethod
    def _callbacks(modeld, ckptd, mmonitor="val_loss", emonitor="loss", lmonitor="val_loss", name="ckpt"):
        
        '''
            mmonitor: monitor for model 
            emonitor: monitor for earlystopping
            lmonitor: monitor for learning rate
        '''
        
        timestamp = datetime.now().strftime("%Y%m%d%H%M")
        
        name_ = "{epoch:04d}_{loss:.3f}_{val_loss:.3f}"
#         checkpointer = ModelCheckpoint(filepath=os.path.join(modeld, "{0}_{1}_{2}.hdf5".format(name, name_, timestamp)),
        checkpointer = ModelCheckpoint(filepath=os.path.join(modeld, "{0}.hdf5".format(name)),
                                       verbose=1,
                                       save_best_only=True, 
                                       monitor=mmonitor)
        
        earlystopper = EarlyStopping(monitor=emonitor, patience=20)

#         reduceLR = ReduceLROnPlateau(monitor=lmonitor, factor=0.5, patience=20, min_lr=0.0001)

        reduceLR = ReduceLROnPlateau(monitor=lmonitor, factor=0.9, patience=10, min_lr=0.0001)
        
        tb = TensorBoard(log_dir=ckptd)

        csvlogger = CSVLogger(os.path.join(ckptd, "{}_{}.log".format(name, timestamp)), append=False, separator=",")

        # Learning rate schedule.
    #     lr_schedule = LearningRateScheduler(fixed_schedule, verbose=0)

        return [checkpointer, earlystopper, reduceLR, tb, csvlogger]

#         return [checkpointer, earlystopper, reduceLR, csvlogger]
    
    @staticmethod
    def _optimizer(lr=1e-1, name="Adam"):
        if name == "SGD":
            optimizer = SGD(learning_rate=lr, momentum=0.9, nesterov=True)
        else:
            optimizer = Adam(learning_rate=lr)
        return optimizer
    
    
    @staticmethod
    def mloader(filepath, custom_objects):
        if custom_objects is not None:
            return load_model(filepath, custom_objects=custom_objects)
        else:
            return load_model(filepath)
            
        

In [60]:
(4 * 10 + 10) + (10 * 20 + 20) + (20 * 30 + 30) + 4 * (30 * 4 + 4 + 4 * 4)

1460

In [302]:
def l2norm(x):
    if isinstance(x, np.ndarray):
        l2 = np.sum(x**2)
    else:
        l2 = tf.map_fn(lambda t: tf.reduce_sum(tf.square(t)), elems=x)
    
    return l2

def cosine_similarity(y_true, y_pred, axis=1):
    
    y_pred = tf.convert_to_tensor(y_pred)
    y_true = tf.cast(y_true, y_pred.dtype)
    
#     l2norm1 = tf.reduce_sum(tf.square(y_true), axis=axis)
#     l2norm2 = tf.reduce_sum(tf.square(y_pred), axis=axis)
    l2norm1 = tf.map_fn(lambda t: tf.reduce_sum(tf.square(t)), elems=y_true)
    l2norm2 = tf.map_fn(lambda t: tf.reduce_sum(tf.square(t)), elems=y_pred)
    yy = tf.reduce_sum(tf.multiply(y_true, y_pred), axis=axis)
        
    return -tf.divide(yy, tf.multiply(tf.sqrt(l2norm1), tf.sqrt(l2norm2)))
    

In [1149]:
class YZKError(Loss):
    def __init__(self,
                 reduction=losses_utils.ReductionV2.AUTO,
                 name=None,
                 element_weight=None, 
                 penalized=None):
    
        """ Initializes `YZKError` instance.
            Args:
              reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to
                loss. Default value is `AUTO`. `AUTO` indicates that the reduction
                option will be determined by the usage context. For almost all cases
                this defaults to `SUM_OVER_BATCH_SIZE`. When used with
                `tf.distribute.Strategy`, outside of built-in training loops such as
                `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
                will raise an error. Please see this custom training [tutorial](
                  https://www.tensorflow.org/tutorials/distribute/custom_training)
                for more details.
              name: Optional name for the op. Defaults to 'mean_squared_error'.
        """

        super(YZKError, self).__init__(name=name, reduction=reduction)
        self.element_weight = element_weight
        self.penalized = penalized
    
    def call(self, y_true, y_pred):

        element_weight = self.element_weight

#         logging.info(self.reduction, element_weight)
        
        y_pred = tf.convert_to_tensor(y_pred)
        y_true = tf.cast(y_true, y_pred.dtype)
        batchsize = y_pred.shape[0] 
        assert batchsize == y_true.shape[0]
        
#         logging.info("YZKError, shape of y_true = {}, y_pred = {}".format(y_true.shape, y_pred.shape))
        
        PLoss = 0.    
        penalized = self.penalized
        if penalized is not None:
            
            y_pred_ = tf.convert_to_tensor(y_pred[:, penalized])
            y_true_ = tf.convert_to_tensor(y_true[:, penalized])
            
            y_pred_ = tf.reshape(y_pred_, [-1, 1])
            y_true_ = tf.reshape(y_true_, [-1, 1])
#                 PLoss = tf.losses.MeanAbsoluteError(reduction=tf.losses.Reduction.NONE)(y_true[:, penalized], y_pred[:, penalized])
            PLoss = tf.losses.MeanAbsoluteError(reduction=self.reduction)(y_true_, y_pred_)
        
#         HuberLoss = 0
#         MAELoss = 0
#         if element_weight is not None:
#             element_weight = tf.convert_to_tensor(element_weight)
#             if element_weight.shape != []:  # is not a scale
#                 nelement = y_pred.shape[1]
#                 assert nelement == y_true.shape[1] 
#                 element_weight = tf.broadcast_to(element_weight, [batchsize, nelement])
                
#             y_pred_ = tf.math.multiply(y_pred, element_weight)
#             y_true_ = tf.math.multiply(y_true, element_weight)
        
#             HuberLoss = tf.losses.Huber(reduction=self.reduction, delta=0.5)(y_true_, y_pred_)
#         else:
#             HuberLoss = tf.losses.Huber(reduction=self.reduction, delta=0.5)(y_true, y_pred)
        
        MAELoss = tf.losses.MeanAbsoluteError(reduction=self.reduction)(y_true, y_pred)


    #         CosSimLoss = tf.losses.CosineSimilarity(reduction=tf.losses.Reduction.NONE)(y_true, y_pred, sample_weight=sample_weight)
        CosSimLoss = tf.losses.CosineSimilarity(reduction=self.reduction)(y_true, y_pred)

#         logging.info("YZKError, PLoss: {}\n, HuberLoss: {}\n, MAELoss: {}\n, CosSimLoss: {}\n".format(PLoss, HuberLoss, MAELoss, CosSimLoss))
        
        if penalized is not None:
            return tf.math.add(tf.math.add(tf.math.scalar_mul(3, PLoss), tf.math.scalar_mul(2, MAELoss)), tf.math.scalar_mul(1, CosSimLoss))
        else:
            return tf.math.add(tf.math.scalar_mul(2, MAELoss), tf.math.scalar_mul(1, CosSimLoss))


# inverse transform sigmoid(x)
<center>
<font size=6>
$
\begin{align}
\sigma &=\frac{1}{1+e^{-x}} \\
\frac{1}{\sigma} &= 1+e^{-x} \\
\frac{1}{\sigma}-1 &= e^{-x} \\
\log{\frac{1-\sigma}{\sigma}} &= \log{e^{-x}} \\
x &= -\log{\frac{1-\sigma}{\sigma}} \\
x &= \log{\frac{\sigma}{1-\sigma}}
\end{align}
$
</font size>
</center>

In [74]:
class WeightedBinaryCrossntropy(Loss):
    def __init__(self,
                 reduction=losses_utils.ReductionV2.AUTO,
                 name=None,
                 element_weight=None):
    
        """ Initializes `YZKError` instance.
            Args:
              reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to
                loss. Default value is `AUTO`. `AUTO` indicates that the reduction
                option will be determined by the usage context. For almost all cases
                this defaults to `SUM_OVER_BATCH_SIZE`. When used with
                `tf.distribute.Strategy`, outside of built-in training loops such as
                `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
                will raise an error. Please see this custom training [tutorial](
                  https://www.tensorflow.org/tutorials/distribute/custom_training)
                for more details.
               element_weight: [w for y_true=1, w for y_true=0]
            Ex.
                WeightedBinaryCrossntropy(reduction=tf.losses.Reduction.NONE, element_weight=[5, 1])(y_true, y_pred)
        """

        super(WeightedBinaryCrossntropy, self).__init__(name=name, reduction=reduction)
        self.element_weight = element_weight
    
    def call(self, y_true, y_pred, from_logits=False):
        
        y_pred = tf.convert_to_tensor(y_pred)
        y_true = tf.cast(y_true, y_pred.dtype)
        
        element_weight = self.element_weight
        
        print(element_weight)
        
        if not from_logits:
            y_pred = tf.clip_by_value(y_pred, tf.keras.backend.epsilon(), 1 - tf.keras.backend.epsilon())
            y_pred = tf.math.log(y_pred / (1 - y_pred))
            y_pred = tf.math.sigmoid(y_pred)
            
        if element_weight is None:
            bc = -(y_true * tf.math.log(y_pred) + (1 - y_true) * tf.math.log(1 - y_pred))
        else:
            bc = -(y_true * tf.math.log(y_pred) * element_weight[0] + (1 - y_true) * tf.math.log(1 - y_pred) * element_weight[1])

        return tf.math.reduce_mean(bc, axis=-1)


In [53]:
def train(X_train, y_train, epochs, batch_size, mconf, loss="mae", modeld="model", ckptd="ckpt", name="NN", earlystopper=True):
    '''
        X: [nsize, nstn, timestep, feature]
        y: [nsize, nstn, features]
        mconf: {name, units, inshape, outshape, outactfn, batchNormalization, dropouts, activations}
    '''

    timesteps = X_train.shape[1]
    nfeatures = X_train.shape[2]
    
#     timesteps = 6
#     nfeatures = 4

    NN = NNBuilder(modeld=modeld, ckptd=ckptd, name=name)
    if mconf["name"] == "DNNLSTM":
        LSTM, callbacks_, optimizer_ = NN.DNNLSTM(mconf["units"], inshape=(timesteps, nfeatures), outshape=nfeatures, dropouts=mconf["dropouts"], activations=mconf["activations"])
    elif mconf["name"] == "stackedLSTM":
        LSTM, callbacks_, optimizer_ = NN.stackedLSTM(shape=(timesteps, nfeatures), cells=60)
    else:
        logging.warning("model name undefined.")
        
    LSTM.summary()
    LSTM.compile(loss=loss, optimizer=optimizer_)
    
#     callbacks_ = checkpointer, earlystopper, reduceLR, tb, csvlogger
    
    if not earlystopper:
        callbacks_.pop(1)

    history = LSTM.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, callbacks=callbacks_, validation_split=0.1, verbose=2, shuffle=True)
#     history = LSTM.fit(x=dg, epochs=epochs, batch_size=batch_size, callbacks=callbacks_, verbose=2, shuffle=True)

    fig, ax = plt.subplots()
    ax.plot(history.history['loss'], label='train')
    ax.plot(history.history['val_loss'], label='test')
    ax.legend(fontsize=14)
    plt.savefig("{}/{}_trainingHistory.png".format(ckptd, name))
    plt.close()
    
    
    return history

In [61]:
if __name__ == "__main__":
    
    import tensorflow as tf

    os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = "1"

    # import tensorflow as tf
    gpus = tf.config.experimental.list_physical_devices('GPU')
    nMB  = 1024 * 8
    
    if gpus:
        try:
            tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=nMB)])
        except RuntimeError as e:
            print(e)
    
    # 1.
    inputs = Input(shape=(10), name="Input")
    x = NNBuilder.DenseBuilder([10, 30, 20, 40], inputs, dropouts=0.25)
    model = Model(inputs=inputs, outputs=x, name="DNN")
    model.summary()

    # 2. 
    NNB = NNBuilder()
    model, callbacks_, optimizer_ = NNB.DNNLSTM([10, 20, 30, 10], inshape=(6, 4), outshape=[4, 1], outactfn=["sigmoid"], batchNormalization=None)
    model.summary()
    plot_model(model, to_file="DNNLSTM.png", show_shapes=True)

    model.compile(loss={"regression_output": "mae", "classification_output": "binary_crossentropy"},
                  metrics={"regression_output": "mae", "classification_output": "accuracy"},
                  optimizer=optimizer_)

    epochs = 100
    batch_size = 100
    n = 10000
    X_train = np.random.random_sample((n, 6, 4))
    y_train = [np.random.random_sample((n, 4)), np.random.randint(low=0, high=2, size=(n, 1))]
    history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, callbacks=callbacks_, validation_split=0.1, verbose=2, shuffle=True)

    
    
    sys.exit()
    
    stackedLSTM, callbacks_, optimizer_ = NNBuilder().stackedLSTM([6, 4], 60)
    stackedLSTM.compile(loss="mae", optimizer=optimizer_)
    stackedLSTM.summary()
#     history = stackedLSTM.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, callbacks=callbacks_, validation_split=0.1, verbose=2, shuffle=True)

    X_train = np.random.random_sample((1000000, 6, 4))
    y_train = np.random.random_sample((1000000, 4))

#     saved_model = "/home/yuzhe/DataScience/QC/model/lstm1_0154_0.009_0.008_202008071814.hdf5"
#     model = NNBuilder.mloader(saved_model)


    train(X_train, y_train, 30, 5000, loss=YZKError(element_weight=[1 / 6., 1 / 6., 1 / 6., 1 / 2.]), name="NNBuilderTest1")
#     train(X_train, y_train, 30, 5000, loss=YZKError(), name="NNBuilderTest")


0
1
2
3
Model: "DNN"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Input (InputLayer)           [(None, 10)]              0         
_________________________________________________________________
Dense_1 (Dense)              (None, 10)                110       
_________________________________________________________________
BatchNormalization_1 (BatchN (None, 10)                40        
_________________________________________________________________
Activation_1 (Activation)    (None, 10)                0         
_________________________________________________________________
Dropout_1 (Dropout)          (None, 10)                0         
_________________________________________________________________
Dense_2 (Dense)              (None, 30)                330       
_________________________________________________________________
BatchNormalization_2 (BatchN (None, 30)                




Epoch 00001: val_loss improved from inf to 0.94079, saving model to model/NNBuilder.hdf5
90/90 - 1s - loss: 0.9451 - regression_output_loss: 0.2519 - classification_output_loss: 0.6932 - regression_output_mae: 0.2519 - classification_output_accuracy: 0.5027 - val_loss: 0.9408 - val_regression_output_loss: 0.2475 - val_classification_output_loss: 0.6933 - val_regression_output_mae: 0.2475 - val_classification_output_accuracy: 0.4930
Epoch 2/100

Epoch 00002: val_loss did not improve from 0.94079
90/90 - 0s - loss: 0.9450 - regression_output_loss: 0.2518 - classification_output_loss: 0.6932 - regression_output_mae: 0.2518 - classification_output_accuracy: 0.4971 - val_loss: 0.9410 - val_regression_output_loss: 0.2475 - val_classification_output_loss: 0.6935 - val_regression_output_mae: 0.2475 - val_classification_output_accuracy: 0.4930
Epoch 3/100

Epoch 00003: val_loss did not improve from 0.94079
90/90 - 0s - loss: 0.9451 - regression_output_loss: 0.2518 - classification_output_loss:

SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [320]:
def logcosh(a, t):
    return (1 / a) * np.log(np.cosh(a * t))

In [27]:
if __name__ == "__main__":
    
    from dgenerator import dgenerator

    from sklearn.preprocessing import MinMaxScaler
    
    tperiod = [2016010101, 2016123124]
    n_in = 6
    n_out = 1
    mode = "test"
    vstack = True
    fnpy = True
    npyd = "/home/yuzhe/DataScience/dataset"
    gif = "/home/yuzhe/CODE/ProgramT1/GRDTools/SRC/RES/GI/1500_decode_stationlist_without_space.txt"

    dg = dgenerator(gif=gif, npyd=npyd)
    vinfo = pd.DataFrame(dg.vrange)  
    vinfo = pd.DataFrame(vinfo)
    print(vinfo)
#     vinfo = {"Temp": [-20.0, 50.0],
#              "RH": [0.0, 100.0], 
#              "Pres": [600.0, 1100.0], 
#              "Precp": [0.0, 220.0]}
    dataset = dg.hrfgenerator(tperiod, n_in=n_in, n_out=n_out, mode=mode, rescale=True, reformat=True, vstack=vstack, fnpy=fnpy, generator=False)
    
    datetimes = dataset[1]
    nsize = len(datetimes)
    print(dataset[0].shape)

    
    saved_model = "../QC/model/lstm1_0055_0.008_0.011_202008111819_2.hdf5"
    model = NNBuilder.mloader(saved_model)
    
    scaler = MinMaxScaler()
    scaler.fit(vinfo.values)
    
    print(scaler.inverse_transform([[0.7, 0.6, 0.7, 0.2]]))

    fig, ax = plt.subplots(figsize=(16, 10))
    
    n = nsize
    
#     x = dataset[0][:, -4:]
#     x = x[~np.isnan(x).any(axis=1)]
#     idx = np.random.choice(np.arange(x.shape[0]), n, replace=False)
#     x = x[idx, 0:]
#     x = tf.convert_to_tensor(x)

    scaled = dataset[0]
    scaled = scaled[~np.isnan(scaled).any(axis=1)]
    X_test = np.reshape(scaled[:, :-4], (-1, 6, 4))
    y_true = scaled[:, -4:]

    y_true = tf.convert_to_tensor(y_true, dtype=tf.float32)
    y_pred = model.predict(X_test)
    y_pred = tf.convert_to_tensor(y_pred, dtype=tf.float32)
#     xynorm = tf.norm(tf.subtract(y_pred, y_true), axis=1) 

    y_true = tf.reshape(y_true[:, -1], [-1, 1])
    y_pred = tf.reshape(y_pred[:, -1], [-1, 1])
    xynorm = tf.subtract(y_pred, y_true)

    print(y_true.shape, y_true)
    print(y_pred.shape, y_pred)
    print(xynorm.shape)
    
    sample_weight = 1
#     sample_weight = tf.broadcast_to(sample_weight, y_pred.shape)
    
    loss = YZKError(reduction=tf.losses.Reduction.NONE)(y_true, y_pred)
    ax.scatter(xynorm, loss, label="YZK")
    print('yzk-loss: ', loss)
    
    loss = tf.losses.LogCosh(reduction=tf.losses.Reduction.NONE)(y_true, y_pred)
#     loss = tf.sort(loss)

    print("shape of loss = {}, xynorm = {}".format(loss, xynorm.shape))
    ax.scatter(xynorm, loss, label="LogCosh")
#     mposi1 = y_true[tf.where(loss == tf.math.reduce_max(loss)).numpy()[0, 0], :]
#     mposi2 = y_pred[tf.where(loss == tf.math.reduce_max(loss)).numpy()[0, 0], :]
#     print(mposi1, mposi2)

    loss = tf.losses.MeanAbsoluteError(reduction=tf.losses.Reduction.NONE)(y_true, y_pred)
#     loss = tf.sort(loss)
    ax.scatter(xynorm, loss, label="MAE")
#     mposi1 = y_true[tf.where(loss == tf.math.reduce_max(loss)).numpy()[0, 0], :]
#     mposi2 = y_pred[tf.where(loss == tf.math.reduce_max(loss)).numpy()[0, 0], :]
#     print(mposi1, mposi2)

    loss = tf.losses.CosineSimilarity(reduction=tf.losses.Reduction.NONE)(y_true, y_pred, sample_weight=sample_weight)
#     loss = tf.sort(loss)
    ax.scatter(xynorm, loss, label="Cos")
#     mposi1 = y_true[tf.where(loss == tf.math.reduce_max(loss)).numpy()[0, 0], :]
#     mposi2 = y_pred[tf.where(loss == tf.math.reduce_max(loss)).numpy()[0, 0], :]
#     print(mposi1, mposi2)


#     loss = tf.keras.losses.KLDivergence(reduction=tf.losses.Reduction.NONE)(y_true, y_pred)
#     loss = tf.sort(loss)
#     ax.scatter(xynorm, loss, label="KL")

    loss = tf.keras.losses.MeanSquaredError(reduction=tf.losses.Reduction.NONE)(y_true, y_pred)
    ax.scatter(xynorm, loss, label="MSE")


    loss = tf.keras.losses.MeanSquaredLogarithmicError(reduction=tf.losses.Reduction.NONE)(y_true, y_pred)
#     loss = tf.sort(loss)
    ax.scatter(xynorm, loss, label="MSLE")
#     mposi1 = y_true[tf.where(loss == tf.math.reduce_max(loss)).numpy()[0, 0], :]
#     mposi2 = y_pred[tf.where(loss == tf.math.reduce_max(loss)).numpy()[0, 0], :]
#     print(mposi1, mposi2)
    
    loss = tf.keras.losses.Huber(reduction=tf.losses.Reduction.NONE, delta=0.25)(y_true, y_pred, sample_weight=sample_weight)
#     loss = tf.sort(loss)
    ax.scatter(xynorm, loss, label="Huber")
#     mposi1 = y_true[tf.where(loss == tf.math.reduce_max(loss)).numpy()[0, 0], :]
#     mposi2 = y_pred[tf.where(loss == tf.math.reduce_max(loss)).numpy()[0, 0], :]
#     print(mposi1, mposi2)

    ax.legend()

   Temp     RH    Pres  Precp
0 -20.0    0.0   600.0    0.0
1  50.0  100.0  1100.0  220.0
(2745636, 28)


OSError: SavedModel file does not exist at: ../QC/model/lstm1_0055_0.008_0.011_202008111819_2.hdf5/{saved_model.pbtxt|saved_model.pb}

In [None]:
def generate_sample_weights(training_data, class_weight_dictionary): 
    sample_weights = [class_weight_dictionary[np.where(one_hot_row==1)[0][0]] for one_hot_row in training_data]
    return np.asarray(sample_weights)