In [1]:
# import torch
import glob
import numpy as np
import pandas as pd
from math import sqrt
from datetime import datetime
from matplotlib import pyplot
from pandas.core.common import flatten

# from torch import nn, optim
from pickle import dump
from pickle import load
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Conv1D
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.utils import plot_model
from tensorflow.keras.models import load_model
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import BatchNormalization

In [2]:
valves = ["20-LV-1031_Z_X_Value", "20-LV-1031_Z_Y_Value", "20-LV-1034_Z_X_Value",
              "20-LV-1034_Z_Y_Value", "20-PV-1037_Z_X_Value", "20-PV-1037_Z_Y_Value"]

In [3]:
valve = '20-PV-1037_Z_Y_Value'

In [4]:
def create_df(valve_name):
    """ Returns a dataset of specific valve column."""
    
    dfs = {file.split("_")[-4]: pd.read_pickle(file) for file in \
           glob.glob("S:\SRH\BDBA_Sem_2\Case_study_1\data\*.pkl")}
    
    dfs_sorted = dict(sorted(dfs.items()))
    df_single = pd.concat(dfs_sorted, axis=0)

    valve_df = df_single.filter([valve_name])
    valve_df = valve_df.droplevel(0, axis=0)
    return valve_df

valve_1_df = create_df(valve)

def preprocessed_df(df, val_pct):
    """ Creates train, validation and test set after applying normalisation of all feature cols
    Args:
    df: dataframe object
    val_pct: percentage size of validation plus test size (float)
    """
    
    val_data_size = round(df.shape[0] * 0.3)
    test_data_size = round(val_data_size * 0.1)
    
    train_data = df[:-val_data_size]
    val_data = df[-val_data_size:-test_data_size]
    test_data = df[-test_data_size:]
    
    # Scaling the data
    scalar = MinMaxScaler()
    scalar.fit(train_data.values.reshape(-1,1)) # sklearn scalars accepts 2d arrays to scale hence reshaping the data
    # save the scaler
    dump(scalar, open(f'model_objects_hybrid\scaler_hybrid_{valve}.pkl', 'wb'))

    train_scaled = scalar.transform(train_data.values.reshape(-1,1)) # pandas does'nt have .reshape() hence by calling the values and then applying underlying numpy reshape
    val_scaled = scalar.transform(val_data.values.reshape(-1,1))
    test_scaled = scalar.transform(test_data.values.reshape(-1,1))

    df_train = pd.DataFrame(train_scaled)
    df_val = pd.DataFrame(val_scaled)
    df_test = pd.DataFrame(test_scaled)
    
    return df_train, df_val, df_test

df_train, df_val, df_test = preprocessed_df(valve_1_df, 0.3)

# Creating small input sequences
def input_sequence(data, seq_len):
    """ Creates a small input sequence of a given seq length and 
        returns two numpy arrays asinpuy and output sequence
        
        Args:
        data = input dataframe
        seq_len = integer number

        """
    input_x = []
    output_y = []
    
    for i in range(len(data)-seq_len-1):
        in_x = data[i:(i+seq_len)]
        out_y = data[i+seq_len]
        
        input_x.append(in_x)
        output_y.append(out_y)
    
    return np.array(input_x), np.array(output_y)

train_values = df_train.values.astype('float32')
val_values = df_val.values.astype('float32')
test_values = df_test.values.astype('float32')
# specify the window size
n_steps = 15
# split into samples
X_train, y_train = input_sequence(train_values, n_steps)
X_val, y_val = input_sequence(val_values, n_steps)
X_test, y_test = input_sequence(test_values, n_steps)
# reshape into [samples, timesteps, features]
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_val = X_val.reshape((X_val.shape[0], X_val.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

In [5]:
model = tf.keras.models.Sequential([tf.keras.layers.Conv1D(filters=32, kernel_size=5,strides=1, padding="causal",activation="relu",
                                                   input_shape=[None, 1]),
                                                   tf.keras.layers.LSTM(64, return_sequences=True),
                                                   tf.keras.layers.LSTM(64, return_sequences=True),  
                                                   tf.keras.layers.Dense(30, activation="relu"),  
                                                   tf.keras.layers.Dense(10, activation="relu"),  
                                                   tf.keras.layers.Dense(1)])

In [6]:
optimizer = tf.keras.optimizers.SGD(lr=1e-4, momentum=0.9)
es = EarlyStopping(monitor='val_loss', patience=6)
model.compile(loss=tf.keras.losses.Huber(),
              optimizer=optimizer,
              metrics=["mae"])
history = model.fit(X_train, y_train, epochs=100, batch_size=32, verbose=2, 
                            validation_data=(X_val, y_val), callbacks=[es])

Epoch 1/100
522/522 - 13s - loss: 0.1174 - mae: 0.4120 - val_loss: 0.0452 - val_mae: 0.2484
Epoch 2/100
522/522 - 8s - loss: 0.0554 - mae: 0.2586 - val_loss: 0.0223 - val_mae: 0.1821
Epoch 3/100
522/522 - 8s - loss: 0.0369 - mae: 0.2267 - val_loss: 0.0180 - val_mae: 0.1640
Epoch 4/100
522/522 - 8s - loss: 0.0317 - mae: 0.2235 - val_loss: 0.0181 - val_mae: 0.1604
Epoch 5/100
522/522 - 8s - loss: 0.0303 - mae: 0.2233 - val_loss: 0.0187 - val_mae: 0.1611
Epoch 6/100
522/522 - 8s - loss: 0.0299 - mae: 0.2234 - val_loss: 0.0192 - val_mae: 0.1634
Epoch 7/100
522/522 - 8s - loss: 0.0298 - mae: 0.2235 - val_loss: 0.0195 - val_mae: 0.1648
Epoch 8/100
522/522 - 8s - loss: 0.0298 - mae: 0.2235 - val_loss: 0.0196 - val_mae: 0.1655
Epoch 9/100
522/522 - 9s - loss: 0.0297 - mae: 0.2234 - val_loss: 0.0197 - val_mae: 0.1659


In [7]:
# save model to file
model.save(f'model_time_series_hybrid_{valve}.h5')

In [8]:
model = load_model(f'S:\SRH\caseStudy1\Codes\codes_timeSeries\model_time_series_hybrid_{valve}.h5')

In [9]:
# load the scaler
# scalar = load(open(f'S:\SRH\caseStudy1\Codes\codes_timeSeries\model_objects_hybrid\scaler_hybrid_{valve}.pkl', 'rb'))

In [10]:
# evaluate the model
mse, mae = model.evaluate(X_test, y_test, verbose=0)
print('MSE: %.3f, RMSE: %.3f, MAE: %.3f' % (mse, sqrt(mse), mae))

MSE: 0.021, RMSE: 0.145, MAE: 0.171


MSE: 0.008, RMSE: 0.090, MAE: 0.094 for '20-LV-1031_Z_X_Value'