In [1]:
import glob
import numpy as np
import pandas as pd
from math import sqrt
from matplotlib import pyplot as plt
from datetime import datetime
from IPython.display import clear_output
from pandas.core.common import flatten


from tensorflow import keras
from pickle import dump
from pickle import load
import kerastuner as kt
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.utils import plot_model
from tensorflow.keras.models import load_model
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import BatchNormalization

In [2]:
alarm = '1037_L'

In [None]:
def create_df(alarm_name):
    """ Returns a dataset of all valve values and specific alarm column."""
    
    dfs = {file.split("_")[-4]: pd.read_pickle(file) for file in \
           glob.glob("S:\SRH\BDBA_Sem_2\Case_study_1\data\*.pkl")}
    
    dfs_sorted = dict(sorted(dfs.items()))
    df_single = pd.concat(dfs_sorted, axis=0)
    
    alarms = ['1031_H', '1031_L', '1034_H', '1034_L', '1037_H', '1037_L']
    alarms.remove(alarm_name)
    
    df_alarm = df_single.drop(alarms, axis=1)
    df_alarm.fillna(0, inplace=True)
    
    return df_alarm

In [None]:
df_alarm = create_df(alarm)

In [None]:
def input_sequence(data, past_seq_len, future_window):
    """ Creates a small input sequence of a given seq length and 
        returns two numpy arrays as input and output sequence
        
        Args:
        data: input dataframe
        past_seq_len: integer number
        future_window: integer number

        """
    target_df = data.iloc[:,-1]
    input_x = []
    output_y = []
    for i in range(len(data) - past_seq_len -1):
        ins = data.iloc[i:(i+past_seq_len), 0:data.shape[1]-1]
        ots = np.where((target_df.iloc[(i+past_seq_len):(i+past_seq_len+future_window)]>0).any(), 1, 0)
        input_x.append(ins)
        output_y.append(ots)
    in_array = np.array(input_x).astype(np.float32)
    out_array = np.array(output_y).astype(np.float32)
    
    return in_array, out_array.reshape(-1,1)

In [None]:
def preprocessed_df(df, val_pct):
    """ Creates train, validation and test set after applying normalisation of all feature cols
    Args:
    df: dataframe object
    val_pct: percentage size of validation plus test size (float)
    """
    
    test_data_size = round(df.shape[0] * val_pct)
    
    train_data = df[:-test_data_size]
    test_data = df[-test_data_size:]
    
    # Scaling the data
    scalar = MinMaxScaler()
    scalar.fit(train_data.iloc[:,:-1])
    # save the scaler
    dump(scalar, open('model_objects\scaler_cls_fcn'+alarm+'.pkl', 'wb'))
    
    train_scaled = scalar.transform(train_data.iloc[:,:-1]) 
    test_scaled = scalar.transform(test_data.iloc[:,:-1])
    
    df_train = pd.DataFrame(train_scaled)
    df_train['alarm'] = train_data.iloc[:,-1].values
    df_test = pd.DataFrame(test_scaled)
    df_test['alarm'] = test_data.iloc[:,-1].values
    
    return df_train, df_test

In [None]:
df_train, df_test = preprocessed_df(df_alarm, 0.3)

In [None]:
# specify the window size
n_steps = 15
future_window = 10

# split into samples
X_train, y_train = input_sequence(df_train, n_steps, future_window)
X_test, y_test = input_sequence(df_test, n_steps, future_window)

nb_classes = len(np.unique(y_test))

In [None]:
X_train = X_train.reshape(X_train.shape + (1,))
X_test = X_test.reshape(X_test.shape + (1,))

In [None]:
batch_size = min(X_train.shape[0]/10, 16)
nb_epochs = 50

x = keras.layers.Input(X_train.shape[1:])

#drop_out = Dropout(0.2)(x)
conv1 = keras.layers.Conv2D(128, 8, 1, padding='same')(x)
conv1 = keras.layers.BatchNormalization()(conv1)
conv1 = keras.layers.Activation('relu')(conv1)
    
#drop_out = Dropout(0.2)(conv1)
conv2 = keras.layers.Conv2D(256, 5, 1, padding='same')(conv1)
conv2 = keras.layers.BatchNormalization()(conv2)
conv2 = keras.layers.Activation('relu')(conv2)
    
#drop_out = Dropout(0.2)(conv2)
conv3 = keras.layers.Conv2D(128, 3, 1, padding='same')(conv2)
conv3 = keras.layers.BatchNormalization()(conv3)
conv3 = keras.layers.Activation('relu')(conv3)
    
full = keras.layers.GlobalAveragePooling2D()(conv3)
out = keras.layers.Dense(nb_classes, activation='sigmoid')(full)

model = keras.models.Model(inputs=x, outputs=out)

optimizer = keras.optimizers.Adam()

model.compile(loss='sparse_categorical_crossentropy',
                  optimizer=optimizer,
                  metrics=['accuracy'])
#callback function earlystopping
reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor = 'loss', factor=0.5,
                      patience=15, min_lr=0.0001)
hist = model.fit(X_train, y_train, batch_size=batch_size, epochs=nb_epochs,
              verbose=1, validation_data=(X_test, y_test), callbacks = [reduce_lr])
#Print the testing results which has the lowest training loss.
log = pd.DataFrame(hist.history)
print(log.loc[log['loss'].idxmin]['loss'], log.loc[log['loss'].idxmin]['val_acc'])