In [None]:
# Import the libraies
import numpy as np
import pandas as pd
import os
import keras
import tensorflow as tf

from keras.models import Sequential
from keras.layers import  Bidirectional,Dense, Dropout, Activation, Flatten, LSTM, TimeDistributed, RepeatVector
from keras.layers.normalization import BatchNormalization
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error,r2_score
from keras.callbacks import ReduceLROnPlateau
import matplotlib.pyplot as plt
%matplotlib inline
pd.set_option('display.max_columns', None)

# prepare training data and data visualization

def readdata(datapath):
    """read data with datetime index
    Args:
       datapath(str):csv file
    Returns:
       rawdata(dataframe)
    """
    rawdata=pd.read_csv(datapath)
    rawdata['time'] = pd.to_datetime(rawdata['Time'],format="%Y/%m/%d %H:%M")
    rawdata['time']=rawdata['time'].apply(pd.Timestamp)
    rawdata.index=rawdata['time']
    rawdata=rawdata.drop(['Time','time'], axis=1)
    return rawdata

def data_norm(df):
    """data normalization(min_max)
    Args:
       df(dataframe)
    Returns:
       df_norm(dataframe)
    """
    column_high= {
                'X1':3.5,
                'X2':13
                }
    column_low= {
                'X1':2.6,
                'X2':-0.2,
                }
    
    df_norm=pd.DataFrame()
    for i in df:    
        df_norm[i]=(df[i]-column_low[i])/(column_high[i]-column_low[i])
    return df_norm

def up_low_check(df):
    """Check the upper and lowwer bonds and drop the outlier
    Args:
       Dataframe
    Returns:
       Dataframe
    """
    for i in df.columns:
        mask1=df[i]>column_high[i]
        mask2=df[i]<column_low[i]
        df.loc[(mask1) | (mask2),:]=np.nan
    return df

def buildWindows(df, windowsize,feature):
    """Build data scrolling in the data window 
    Args:
       df(dataframe):Features and targets
       windowsize(int):Window sizes
       feature(int): Number of features
    Returns:
       numpy: X ,Y
    """
    X_Window, Y_Window = [], []
    for i in range(df.shape[0]-windowsize+1):
        df_select=df.iloc[i:i+windowsize,0:feature:].drop_duplicates()
        if df_select.shape[0]==df.iloc[i:i+windowsize,0:feature:].shape[0]:
            Y_Window.append(np.array(df.iloc[i+past:i+windowsize,feature:]))
            X_Window.append(np.array(df.iloc[i:i+windowsize,0:feature])) 
    Y_Window=np.array(Y_Window).reshape(-1,1)
    return np.array(X_Window), np.array(Y_Window)

def splitData(X,Y,val_size,mode):
    """Train/Validation data split 
    Args:
       X(numpy):features
       Y(numpy):targets
       val_size(float): validation sizes(ratio)
    Returns:
       numpy: X_train, Y_train, X_val, Y_val
    """
    if mode=="random":
        from sklearn.model_selection import train_test_split
        X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=val_size, random_state=42)
    if mode=="normal":
        X_train = X[:int(X.shape[0]*(1-val_size))]
        Y_train = Y[:int(Y.shape[0]*(1-val_size))]
        X_val = X[int(X.shape[0]*(1-val_size)):]
        Y_val = Y[int(Y.shape[0]*(1-val_size)):]
    return X_train, Y_train, X_val, Y_val

def training_set_prepare(datapath,y_num,featureNum,WindowsSize,futureDay):
    """training_set_prepare
    Args:
       datapath(str):
       y_num(int): Number of targets
       featureNum(int): Number of features
       
    Returns:
       numpy: X_train, Y_train, X_val, Y_val
    """
    
    training_data=readdata(datapath)
    new_training_set_describe=training_data.describe().T
    new_up_low(new_training_set_describe)
    training_data_up_low_check=up_low_check(training_data)
    training_set_norm=data_norm(training_data_up_low_check)
    training_set_norm["Y_+1"]=training_set_norm["Y"].shift(-1)

    X_train_W, Y_train_W = buildWindows(training_set_norm, WindowsSize,featureNum)
    X_train, Y_train, X_val, Y_val = splitData(X_train_W, Y_train_W, 0.2)
    return X_train, Y_train, X_val, Y_val

def data_plot(rawdata):
    """plot the data in time series
    Args:
       rawdata(dataframe):   
    Returns:
       None
    """
    column_name=rawdata.columns
    column_num=len(column_name)
    plt.style.use('ggplot')
    plt.figure(figsize=(50,column_num*10)).patch.set_facecolor('white')
    data_P=rawdata
    for idx, i in enumerate(column_name):
        plt.subplot(column_num,1,idx+1)
        plt.title(i,fontsize=50)
        plt.scatter(rawdata.index,rawdata[i],s=5,c="#56B4E9")
        plt.yticks(fontsize=40)
        plt.xticks(fontsize=40,rotation=45)
        plt.legend()
    plt.tight_layout()
    plt.show()
    
#soft sensor 
def buildLSTMModel(X_shape,Y_shape):
    """Build LSTM model  
    Args:
       X_shape(numpy)
       Y_shape(numpy)
    Returns:
       model(keras model)
    """
    model = Sequential()
    model.add(LSTM(units = 64, activation='tanh',recurrent_initializer='orthogonal',
                   return_sequences = True, input_length=X_shape[1], input_dim=X_shape[2]))
    model.add(LSTM(units = 32,activation='tanh'))
    model.add(Dropout(0.1))
    model.add(Dense(units=Y_shape, activation="linear", name="output"))
    model.compile(loss="mse", optimizer="adam",metrics=['mse'])
    model.summary()
    return model

def show_train_history(train_history,model_name):
    """plot training report   
    Args:
       train_history(training history)
       model_name(str)
    Returns:
       None
    """
    plt.figure(figsize=(8,5))
    plt.plot(train_history.history['mean_squared_error'], label='train')
    plt.plot(train_history.history['val_mean_squared_error'], label='validation')
    plt.xlabel('Epoch')
    plt.ylabel('MSE(Loss)')
    plt.title('Training Report--{}'.format(model_name),fontsize=15)
    plt.text(1,0.00026,"train_loss:{}  val_loss:{}".format(round(train_history.history['mean_squared_error'][-1], 5),
                                                 round(train_history.history['val_mean_squared_error'][-1], 5)))
    plt.legend()
    plt.savefig('Training Report--{}.jpg'.format(model_name))
    plt.show()
    
def soft_sensor_train(X_train, Y_train, X_val, Y_val,modle_name):
    model = buildLSTMModel(X_train.shape,Y_train.shape[1])
    callback = EarlyStopping(monitor="val_loss", patience=5, verbose=1, mode="auto")
    reduce_lr = ReduceLROnPlateau(factor=0.2, 
                                  min_lr=1e-12, 
                                  monitor='val_loss', 
                                  patience=10,
                                  verbose=1)
    history=model.fit(X_train, Y_train_rs, epochs=50, batch_size=100, validation_data=(X_val, Y_val), callbacks=[callback,reduce_lr])
    model.save("{}_.h5".format(modle_name))
    show_train_history(history,"{}".format(modle_name))