In [1]:
import logging
logging.getLogger().setLevel(logging.INFO)
import numpy as np
import pandas as pd
from utils.utils import PROJECT_DATA_DIR
import os
import mxnet as mx
import mxnet.ndarray as nd
from time import time
from sklearn.preprocessing import StandardScaler, QuantileTransformer
from load_preprocess import (load_data,
                             get_xy,
                             scale_data,
                             binarize_y,
                             prepare_data)
import warnings
warnings.filterwarnings("ignore",  category=DeprecationWarning)
from DL_mxnet_symbol import train_dnn, train_lstm

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
train = load_data(file='all_training_400_minisensor_1.csv')
test = load_data(file='all_test_400_minisensor.csv')
xtrain, ytrain, xtest, ytest = prepare_data(train, test, binary_class=True)
xtrain_sc, xtest_sc = scale_data(xtrain, xtest)

In [3]:
(ytrain.shape, ytest.shape)

((91104,), (22776,))

In [4]:
xtrain_mx = mx.nd.array(xtrain_sc, dtype=np.float32)
ytrain_mx = mx.nd.array(ytrain.reshape(-1, 1))
xtest_mx = mx.nd.array(xtest_sc, dtype=np.float32)
ytest_mx = mx.nd.array(ytest.reshape(-1, 1))
batch_size=2**9

train_iter = mx.io.NDArrayIter(
    xtrain_mx,
    ytrain_mx,
    batch_size=batch_size,
    shuffle=True)

val_iter = mx.io.NDArrayIter(
    xtest_mx,
    ytest_mx,
    batch_size=batch_size)


In [5]:
train_dnn(train_iter, val_iter)

INFO:root:Epoch[0] Batch [100]	Speed: 124006.35 samples/sec	accuracy=1.000000
INFO:root:Epoch[0] Train-accuracy=1.000000
INFO:root:Epoch[0] Time cost=0.897
INFO:root:Epoch[0] Validation-accuracy=1.000000
INFO:root:Epoch[1] Batch [100]	Speed: 102596.86 samples/sec	accuracy=1.000000
INFO:root:Epoch[1] Train-accuracy=1.000000
INFO:root:Epoch[1] Time cost=0.864
INFO:root:Epoch[1] Validation-accuracy=1.000000
INFO:root:Epoch[2] Batch [100]	Speed: 103502.52 samples/sec	accuracy=1.000000
INFO:root:Epoch[2] Train-accuracy=1.000000
INFO:root:Epoch[2] Time cost=0.827
INFO:root:Epoch[2] Validation-accuracy=1.000000
INFO:root:Epoch[3] Batch [100]	Speed: 121360.91 samples/sec	accuracy=1.000000
INFO:root:Epoch[3] Train-accuracy=1.000000
INFO:root:Epoch[3] Time cost=0.755
INFO:root:Epoch[3] Validation-accuracy=1.000000
INFO:root:Epoch[4] Batch [100]	Speed: 118012.93 samples/sec	accuracy=1.000000
INFO:root:Epoch[4] Train-accuracy=1.000000
INFO:root:Epoch[4] Time cost=0.793
INFO:root:Epoch[4] Validatio

## LSTM

In [6]:
xtrain_lstm = xtrain.values.reshape(-1, 3)
xtest_lstm = xtest.values.reshape(-1, 3)
scaler = QuantileTransformer(output_distribution='normal')
xtrain_lstm_sc = scaler.fit_transform(xtrain_lstm)
xtest_lstm_sc = scaler.transform(xtest_lstm)

print(xtrain_lstm_sc.shape)
print(xtest_lstm_sc.shape)
""" Change time steps from 400 to 20 to test if this is the problem"""
xtrain_lstm_sc = mx.nd.array(xtrain_lstm_sc.reshape(-1, 400, 3))
val_lstm_sc = mx.nd.array(xtest_lstm_sc.reshape(-1, 400, 3))
print('shape of xtrain_lstm_sc:', xtrain_lstm_sc.shape)

train_lstm_iter = mx.io.NDArrayIter(
    xtrain_lstm_sc,
    ytrain_mx,
    batch_size,
    shuffle=True,
    last_batch_handle='discard')

val_lstm_iter = mx.io.NDArrayIter(
    val_lstm_sc,
    ytest_mx,
    batch_size,
    shuffle=False,
    last_batch_handle='discard')

(36441600, 3)
(9110400, 3)
shape of xtrain_lstm_sc: (91104, 400, 3)


In [7]:
## This line is erroneuous
#train_lstm(train_lstm_iter, val_lstm_iter)

In [8]:
#stacked_rnn_cells = mx.rnn.SequentialRNNCell()
#stacked_rnn_cells.add(mx.rnn.)

In [79]:
def rnn_fused(timesteps=400, 
              num_layers=3, 
              mode='lstm', 
              num_hidden=20,
              dropout=0.4,
              num_outputs=1, 
              batch_size=2**9, 
              input_dim=3):
    
    data = mx.sym.Variable('data')
    """ Reshape input """
    input_shape = (timesteps, batch_size, input_dim)
    data = mx.sym.Reshape(data, shape=input_shape)
    
    """num_hidden: number of units in output symbol"""
    for i in range(num_layers):
        """ Check if data is flowing correctly trough
        the network"""
        outputs = data # this ensures right data flows 
                        # through the network
        fused_lstm_cell = mx.rnn.FusedRNNCell(
            num_hidden=num_hidden, 
            dropout=dropout)
        """ Implement many layers with for-loop as it is
        more effective when using multiple gpus"""
        outputs, _  = fused_lstm_cell.unroll(
            length=timesteps, 
            inputs=data, 
            merge_outputs=True)
        """ Reshape output from LSTM"""
    output_shape = (batch_size, timesteps, num_hidden)
    outputs = mx.sym.Reshape(outputs, shape=output_shape)
    outputs = mx.sym.Dropout(outputs, p=dropout)
    outputs = mx.sym.FullyConnected(
        data=outputs, 
        name='out', 
        num_hidden=num_outputs)
    outputs = mx.sym.LogisticRegressionOutput(
        outputs,
        name='softmax')
    
    return outputs

In [80]:
net = rnn_fused()
train_iter.reset()
val_iter.reset()
mod = mx.mod.Module(net, context=mx.gpu())
mod.bind(data_shapes=train_lstm_iter.provide_data, 
        label_shapes=train_lstm_iter.provide_label)
mod.init_params(initializer=mx.init.Xavier())
mod.init_optimizer(
            optimizer='sgd',
optimizer_params=(('learning_rate', 0.01), ))

mod.fit(train_data=train_iter,
        eval_data=val_iter,
        #optimizer='s',
        #optimizer_params={'learning_rate': 0.01},
        eval_metric='acc',
        num_epoch=20,
        batch_end_callback = mx.callback.Speedometer(batch_size, 100),
)

INFO:root:Epoch[0] Batch [100]	Speed: 14948.92 samples/sec	accuracy=1.000000
INFO:root:Epoch[0] Train-accuracy=1.000000
INFO:root:Epoch[0] Time cost=6.111
INFO:root:Epoch[0] Validation-accuracy=1.000000
INFO:root:Epoch[1] Batch [100]	Speed: 15174.97 samples/sec	accuracy=1.000000
INFO:root:Epoch[1] Train-accuracy=1.000000
INFO:root:Epoch[1] Time cost=6.004
INFO:root:Epoch[1] Validation-accuracy=1.000000
INFO:root:Epoch[2] Batch [100]	Speed: 14894.61 samples/sec	accuracy=1.000000
INFO:root:Epoch[2] Train-accuracy=1.000000
INFO:root:Epoch[2] Time cost=6.153
INFO:root:Epoch[2] Validation-accuracy=1.000000
INFO:root:Epoch[3] Batch [100]	Speed: 14652.58 samples/sec	accuracy=1.000000
INFO:root:Epoch[3] Train-accuracy=1.000000
INFO:root:Epoch[3] Time cost=6.173
INFO:root:Epoch[3] Validation-accuracy=1.000000
INFO:root:Epoch[4] Batch [100]	Speed: 14928.00 samples/sec	accuracy=1.000000
INFO:root:Epoch[4] Train-accuracy=1.000000
INFO:root:Epoch[4] Time cost=6.104
INFO:root:Epoch[4] Validation-acc

In [73]:
from sklearn.metrics import f1_score, accuracy_score
import numpy as np

def train(timesteps=400, num_layers=3, 
          mode='lstm', num_hidden=20, 
          dropout=0.4, num_outputs=1, 
          batch_size=2**9, input_dim=3, 
          learning_rate=0.01, num_epoch=20):
    
    train_lstm_iter = mx.io.NDArrayIter(
        xtrain_lstm_sc,
        ytrain_mx,
        batch_size,
        shuffle=True,
        last_batch_handle='discard')
    
    val_lstm_iter = mx.io.NDArrayIter(
        val_lstm_sc,
        ytest_mx,
        batch_size,
        shuffle=False,
        last_batch_handle='discard')
        train_iter.reset()
        val_iter.reset()

    net = rnn_fused(timesteps=timesteps, 
                    num_layers=num_layers, 
                    num_hidden=num_hidden, 
                    dropout=dropout, 
                    num_outputs=num_outputs, 
                    batch_size=batch_size, 
                    input_dim=input_dim)
    
    mod = mx.mod.Module(net, context=mx.gpu())
    mod.bind(data_shapes=train_lstm_iter.provide_data, 
             label_shapes=train_lstm_iter.provide_label)
    
    mod.init_params(initializer=mx.init.Xavier())
    mod.init_optimizer(
        optimizer='sgd',
        optimizer_params=(('learning_rate', learning_rate), ))

    mod.fit(
        train_data=train_lstm_iter,
        eval_data=train_lstm_iter,
        eval_metric='acc',
        num_epoch=num_epoch,
        batch_end_callback = mx.callback.Speedometer(
        batch_size, 100))
    
    train_pred = mod.predict(train_iter).asnumpy()
    train_pred = np.where(train_pred > 0.0001, 1, 0)
    #print('train_pred:', train_pred)
    #print('train_pred_max:', np.max(train_pred))
    #print('train_pred_min:', np.min(train_pred))
    #print('train_label:', ytrain)
    f1_train = f1_score(ytrain, train_pred)
    
    test_pred = mod.predict(val_iter).asnumpy()
    test_pred = np.where(test_pred > 0.0001, 1, 0)
    f1_test = f1_score(ytest, test_pred)
    print('train f1 score:', f1_train)
    print('test f1 score:', f1_test)
    return f1_train, f1_test
    
    
    

In [81]:
train(timesteps=400, num_layers=3, 
      num_hidden=20, dropout=0.4, num_outputs=1, 
      batch_size=2**9, input_dim=3, 
      learning_rate=0.01, num_epoch=4)



INFO:root:Epoch[0] Batch [100]	Speed: 14866.64 samples/sec	accuracy=1.000000
INFO:root:Epoch[0] Train-accuracy=1.000000
INFO:root:Epoch[0] Time cost=6.123
INFO:root:Epoch[0] Validation-accuracy=1.000000
INFO:root:Epoch[1] Batch [100]	Speed: 14853.60 samples/sec	accuracy=1.000000
INFO:root:Epoch[1] Train-accuracy=1.000000
INFO:root:Epoch[1] Time cost=6.119
INFO:root:Epoch[1] Validation-accuracy=1.000000
INFO:root:Epoch[2] Batch [100]	Speed: 14334.22 samples/sec	accuracy=1.000000
INFO:root:Epoch[2] Train-accuracy=1.000000
INFO:root:Epoch[2] Time cost=6.347
INFO:root:Epoch[2] Validation-accuracy=1.000000
INFO:root:Epoch[3] Batch [100]	Speed: 14419.39 samples/sec	accuracy=1.000000
INFO:root:Epoch[3] Train-accuracy=1.000000
INFO:root:Epoch[3] Time cost=6.276
INFO:root:Epoch[3] Validation-accuracy=1.000000


train f1 score: 0.0
test f1 score: 0.0


(0.0, 0.0)

## Hyperparameter search

In [None]:
from sklearn.utils import shuffle
patience = 20
optimal_parameters = {}
f1_accepted = 0.6
f1_old = 0
count = 0
while (count < patience):
    print('while loop no: {}'.format(count))
    print('')
    params = {'timesteps': 400,
              'num_layers': np.random.randint(2, 5), 
              'num_hidden': np.random.randint(5, 50),
              'dropout': np.random.uniform(0.1, 0.5),
              'num_outputs': 1, 
              'batch_size': 2**9, # does not work to change batch_size?
              'input_dim': 3, 
              'learning_rate': 10**np.random.uniform(-4, -2), 
              'num_epoch': np.random.randint(10, 20)}
    print('')
    print(params)
    f1_train, f1_test = train(**params)
    count += 1
    
    
    
    

while loop no: 0


{'num_layers': 2, 'num_outputs': 1, 'learning_rate': 0.0011179273637285003, 'num_epoch': 15, 'dropout': 0.4992079264193652, 'num_hidden': 9, 'input_dim': 3, 'batch_size': 512, 'timesteps': 400}


INFO:root:Epoch[0] Batch [100]	Speed: 18101.49 samples/sec	accuracy=1.000000
INFO:root:Epoch[0] Train-accuracy=1.000000
INFO:root:Epoch[0] Time cost=4.998
INFO:root:Epoch[0] Validation-accuracy=1.000000
INFO:root:Epoch[1] Batch [100]	Speed: 18281.91 samples/sec	accuracy=1.000000
INFO:root:Epoch[1] Train-accuracy=1.000000
INFO:root:Epoch[1] Time cost=4.970
INFO:root:Epoch[1] Validation-accuracy=1.000000
INFO:root:Epoch[2] Batch [100]	Speed: 18620.54 samples/sec	accuracy=1.000000
INFO:root:Epoch[2] Train-accuracy=1.000000
INFO:root:Epoch[2] Time cost=4.881
INFO:root:Epoch[2] Validation-accuracy=1.000000
INFO:root:Epoch[3] Batch [100]	Speed: 18091.04 samples/sec	accuracy=1.000000
INFO:root:Epoch[3] Train-accuracy=1.000000
INFO:root:Epoch[3] Time cost=4.970
INFO:root:Epoch[3] Validation-accuracy=1.000000
INFO:root:Epoch[4] Batch [100]	Speed: 18580.15 samples/sec	accuracy=1.000000
INFO:root:Epoch[4] Train-accuracy=1.000000
INFO:root:Epoch[4] Time cost=4.884
INFO:root:Epoch[4] Validation-acc

train f1 score: 0.0
test f1 score: 0.0
while loop no: 1


{'num_layers': 4, 'num_outputs': 1, 'learning_rate': 0.00010613690991812941, 'num_epoch': 18, 'dropout': 0.18199824552467447, 'num_hidden': 49, 'input_dim': 3, 'batch_size': 512, 'timesteps': 400}


INFO:root:Epoch[0] Batch [100]	Speed: 8917.94 samples/sec	accuracy=1.000000
INFO:root:Epoch[0] Train-accuracy=1.000000
INFO:root:Epoch[0] Time cost=10.195
INFO:root:Epoch[0] Validation-accuracy=1.000000
INFO:root:Epoch[1] Batch [100]	Speed: 8702.62 samples/sec	accuracy=1.000000
INFO:root:Epoch[1] Train-accuracy=1.000000
INFO:root:Epoch[1] Time cost=10.411
INFO:root:Epoch[1] Validation-accuracy=1.000000
INFO:root:Epoch[2] Batch [100]	Speed: 8687.91 samples/sec	accuracy=1.000000
INFO:root:Epoch[2] Train-accuracy=1.000000
INFO:root:Epoch[2] Time cost=10.433
INFO:root:Epoch[2] Validation-accuracy=1.000000
INFO:root:Epoch[3] Batch [100]	Speed: 8679.03 samples/sec	accuracy=1.000000
INFO:root:Epoch[3] Train-accuracy=1.000000
INFO:root:Epoch[3] Time cost=10.425
INFO:root:Epoch[3] Validation-accuracy=1.000000
INFO:root:Epoch[4] Batch [100]	Speed: 8546.17 samples/sec	accuracy=1.000000
INFO:root:Epoch[4] Train-accuracy=1.000000
INFO:root:Epoch[4] Time cost=10.600
INFO:root:Epoch[4] Validation-acc

train f1 score: 0.0
test f1 score: 0.0
while loop no: 2


{'num_layers': 2, 'num_outputs': 1, 'learning_rate': 0.0029529972209037234, 'num_epoch': 18, 'dropout': 0.2801145430757215, 'num_hidden': 17, 'input_dim': 3, 'batch_size': 512, 'timesteps': 400}


INFO:root:Epoch[0] Batch [100]	Speed: 13862.60 samples/sec	accuracy=1.000000
INFO:root:Epoch[0] Train-accuracy=1.000000
INFO:root:Epoch[0] Time cost=7.078
INFO:root:Epoch[0] Validation-accuracy=1.000000
INFO:root:Epoch[1] Batch [100]	Speed: 13745.48 samples/sec	accuracy=1.000000
INFO:root:Epoch[1] Train-accuracy=1.000000
INFO:root:Epoch[1] Time cost=6.536
INFO:root:Epoch[1] Validation-accuracy=1.000000
INFO:root:Epoch[2] Batch [100]	Speed: 13737.86 samples/sec	accuracy=1.000000
INFO:root:Epoch[2] Train-accuracy=1.000000
INFO:root:Epoch[2] Time cost=6.462
INFO:root:Epoch[2] Validation-accuracy=1.000000
INFO:root:Epoch[3] Batch [100]	Speed: 13975.75 samples/sec	accuracy=1.000000
INFO:root:Epoch[3] Train-accuracy=1.000000
INFO:root:Epoch[3] Time cost=6.381
INFO:root:Epoch[3] Validation-accuracy=1.000000
INFO:root:Epoch[4] Batch [100]	Speed: 13732.39 samples/sec	accuracy=1.000000
INFO:root:Epoch[4] Train-accuracy=1.000000
INFO:root:Epoch[4] Time cost=6.400
INFO:root:Epoch[4] Validation-acc

train f1 score: 0.0
test f1 score: 0.0
while loop no: 3


{'num_layers': 3, 'num_outputs': 1, 'learning_rate': 0.007348456957322377, 'num_epoch': 16, 'dropout': 0.32725840798852995, 'num_hidden': 31, 'input_dim': 3, 'batch_size': 512, 'timesteps': 400}


INFO:root:Epoch[0] Batch [100]	Speed: 11350.14 samples/sec	accuracy=1.000000
INFO:root:Epoch[0] Train-accuracy=1.000000
INFO:root:Epoch[0] Time cost=8.133
INFO:root:Epoch[0] Validation-accuracy=1.000000
INFO:root:Epoch[1] Batch [100]	Speed: 11121.47 samples/sec	accuracy=1.000000
INFO:root:Epoch[1] Train-accuracy=1.000000
INFO:root:Epoch[1] Time cost=8.128
INFO:root:Epoch[1] Validation-accuracy=1.000000
INFO:root:Epoch[2] Batch [100]	Speed: 11189.40 samples/sec	accuracy=1.000000
INFO:root:Epoch[2] Train-accuracy=1.000000
INFO:root:Epoch[2] Time cost=8.104
INFO:root:Epoch[2] Validation-accuracy=1.000000
INFO:root:Epoch[3] Batch [100]	Speed: 11047.79 samples/sec	accuracy=1.000000
INFO:root:Epoch[3] Train-accuracy=1.000000
INFO:root:Epoch[3] Time cost=8.174
INFO:root:Epoch[3] Validation-accuracy=1.000000
INFO:root:Epoch[4] Batch [100]	Speed: 10918.85 samples/sec	accuracy=1.000000
INFO:root:Epoch[4] Train-accuracy=1.000000
INFO:root:Epoch[4] Time cost=8.223
INFO:root:Epoch[4] Validation-acc