In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import STL
import pywt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler as MMS
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from keras.models import Sequential
import tensorflow as tf
from keras.optimizers import SGD, Adam
from tensorflow import keras
from keras.layers import Dense, LSTM, Dropout
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV, cross_val_score
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.metrics import RootMeanSquaredError, MeanSquaredError

In [4]:
df = pd.read_csv("Data_for_multivariate_LSTM.csv", parse_dates=["Time"])
df.head(5)

Unnamed: 0,Time,BO1016_pressure,BO1017_pressure,BO1029_pressure,NB1017_pressure,WS8007_pressure,NMN BOO Pressure
0,2023-09-03 00:00:00,21.488,20.313,13.168,20.039,12.678,21.444
1,2023-09-03 00:15:00,21.864,20.885,13.504,20.619,12.709,21.264
2,2023-09-03 00:30:00,22.005,20.924,13.285,20.713,12.709,20.787
3,2023-09-03 00:45:00,22.153,21.214,13.379,20.995,12.709,21.076
4,2023-09-03 01:00:00,22.373,21.386,13.191,21.19,12.709,20.787


In [5]:
new_index = pd.date_range(start = df["Time"].min(), end = df["Time"].max(), freq = "5T" )
new_df = pd.DataFrame(new_index, columns=['Time'])
new_df.head(5)

Unnamed: 0,Time
0,2023-09-03 00:00:00
1,2023-09-03 00:05:00
2,2023-09-03 00:10:00
3,2023-09-03 00:15:00
4,2023-09-03 00:20:00


In [6]:
# Kết hợp DataFrame mới với DataFrame gốc
merged_df = pd.merge(new_df, df, how='left', on="Time")
feature = merged_df.drop("Time", axis = 1).columns
# Nội suy tuyến tính trên các khoảng thời gian cụ thể
merged_df[feature] = merged_df[feature].interpolate(method='linear')
# merged_df[feature] = merged_df[feature].interpolate(method='linear interp')
merged_df.to_csv("Data_for_multivariate_LSTM_5T.csv")
print(merged_df.columns)

Index(['Time', 'BO1016_pressure', 'BO1017_pressure', 'BO1029_pressure',
       'NB1017_pressure', 'WS8007_pressure', 'NMN BOO Pressure'],
      dtype='object')


In [7]:
merged_df.set_index("Time", inplace=True) #set index

state_ = merged_df
features = state_.columns
scaler =MMS() #scale data
state = scaler.fit_transform(state_[features])
state = pd.DataFrame(columns=features, data=state, index = merged_df.index)
print(state.head())

merged_df["NMN BOO Pressure"] = scaler.fit_transform(merged_df[["NMN BOO Pressure"]])
control = merged_df["NMN BOO Pressure"]

                     BO1016_pressure  BO1017_pressure  BO1029_pressure  \
Time                                                                     
2023-09-03 00:00:00         0.578953         0.629499         0.195371   
2023-09-03 00:05:00         0.582469         0.635414         0.198729   
2023-09-03 00:10:00         0.585984         0.641330         0.202087   
2023-09-03 00:15:00         0.589499         0.647245         0.205445   
2023-09-03 00:20:00         0.590817         0.647648         0.203256   

                     NB1017_pressure  WS8007_pressure  NMN BOO Pressure  
Time                                                                     
2023-09-03 00:00:00         0.610759         0.233757          0.203199  
2023-09-03 00:05:00         0.616939         0.241474          0.201544  
2023-09-03 00:10:00         0.623119         0.249191          0.199890  
2023-09-03 00:15:00         0.629299         0.256908          0.198235  
2023-09-03 00:20:00         0.630301 

In [8]:
X = []
Y = []

# Loop qua các hàng của dataframe để lấy các cửa sổ thời gian t-5 đến t và giá trị tại t+1 cho Y
for i in range(5, len(df) - 1):
    X.append(state.iloc[i-5:i].values)  # Lấy cửa sổ thời gian t-5 đến < t
    Y.append(control.iloc[i])  # Lấy giá trị t của cột cuối cùng

X = np.array(X)
Y = np.array(Y)
X_state_tr, X_state_te, y_state_tr, y_state_te = train_test_split(X, Y, test_size = 0.25, random_state = 25)
X_state_tr.shape , y_state_tr.shape

((21917, 5, 6), (21917,))

In [1]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dropout, Dense
from tensorflow.keras.optimizers import Adam
# Define the optimizer
opt1 = Adam(learning_rate=0.0005)
ls = 'mae'
# Encoder
encoder_inputs = Input(shape=(5, 6))
encoder_lstm = LSTM(64, recurrent_activation='tanh', return_state=True)
encoder_outputs, state_h, state_c = encoder_lstm(encoder_inputs)
encoder_states = [state_h, state_c]

# Decoder
decoder_inputs = Input(shape=(None, 6))
decoder_lstm = LSTM(64, recurrent_activation='tanh', return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states)
decoder_dense_1 = Dense(32, activation='elu')
decoder_outputs = decoder_dense_1(decoder_outputs)
decoder_dropout_1 = Dropout(0.1)(decoder_outputs)
decoder_dense_2 = Dense(16, activation='elu')
decoder_outputs = decoder_dense_2(decoder_dropout_1)
decoder_dense_3 = Dense(8, activation='elu')
decoder_outputs = decoder_dense_3(decoder_outputs)
decoder_dense_4 = Dense(1)
decoder_outputs = decoder_dense_4(decoder_outputs)
# Create the model
model1 = Model([encoder_inputs, decoder_inputs], decoder_outputs)

# Compile the model
model1.compile(optimizer=opt1, loss=ls)
# Prepare decoder input data (initial zeros for the first prediction step)
decoder_input_data = np.zeros((X_state_tr.shape[0], 1, 6))

# Fit the model
apply1 = model1.fit([X_state_tr, decoder_input_data], y_state_tr, batch_size=1024, epochs=200, validation_split=0.2)

# Prepare decoder input data for testing
decoder_input_test_data = np.zeros((X_state_te.shape[0], 1, 6))

# Evaluate the model
score1 = model1.evaluate([X_state_te, decoder_input_test_data], y_state_te)

NameError: name 'np' is not defined

In [11]:
# Etract data from dataframe and rescale 
from keras.utils import to_categorical
BOO_df = merged_df
Y = np.array(BOO_df['NMN BOO Pressure'].values)
X = np.array(BOO_df[['BO1016_pressure' , 'BO1017_pressure' , 'BO1029_pressure' ,'NB1017_pressure' ,'WS8007_pressure']])

# Get dimensions of input and output 
dimof_output = int(np.max(Y) + 1)
dimof_input = X.shape[1]
print('dimof_input: ', dimof_input)
print(np.max(Y))
print('dimof_output: ', dimof_output)

# Scale/whiten the X data
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Set y as categorical data
Y = to_categorical(Y, dimof_output)
# --------------------------------------------------
def create_dataset(X, Y, **options):
    """Convert an array of X, Y values into a dataset matrix for and LSTM"""
    
    look_back = options.pop('look_back', None)
    dataX, dataY = [], []
    for i in range(len(X) - look_back - 1):
        a = X[i:(i+look_back)]
        dataX.append(a)
        dataY.append(Y[i + look_back])
    return np.array(dataX), np.array(dataY)

def train_test_split_sequential(X, Y, **options):
    """Splits data into train test sets, based on a fraction test_size samples
    from the end of the timeseries"""
    
    test_size = options.pop('test_size', None)
    if test_size is None:
        test_size = 0.25
        
    n_sample = len(Y)
    n_test = int(n_sample * test_size)
        
    X_train = X[:-n_test]
    Y_train = Y[:-n_test]

    X_test = X[-n_test:]
    Y_test = Y[-n_test:]
    
    return X_train, X_test, Y_train, Y_test

# Predictions will be based on look_back minutes of data:
look_back = 12*24
X_all, Y_all = create_dataset(X, Y, look_back=look_back)

X_train, X_test, Y_train, Y_test = train_test_split_sequential(X_all, Y_all, test_size=0.25)

print('X training data shape = ', X_train.shape)
print('Y training data shape', Y_train.shape)

print('X test data shape = ', X_test.shape)
print('Y test data shape', Y_test.shape)

dimof_input:  5
1.0
dimof_output:  2
X training data shape =  (65547, 288, 5)
Y training data shape (65547, 2)
X test data shape =  (21849, 288, 5)
Y test data shape (21849, 2)


In [12]:
Y

array([[1., 0.],
       [1., 0.],
       [1., 0.],
       ...,
       [1., 0.],
       [1., 0.],
       [1., 0.]], dtype=float32)

In [13]:
# Define the parameters
batch_size = 32
dropout = 0.4
look_back = 12*24 # Based on your dataset creation
dimof_input = 5  # Number of features
dimof_output = 2  # Number of target values
# Create the LSTM network
model = Sequential()
model.add(LSTM(units=dimof_output, input_shape=(look_back, dimof_input)))
model.add(Dropout(dropout))
model.add(Dense(dimof_output, activation='softmax'))

# Compile the model
model.compile(loss='mse', optimizer='rmsprop')

# Summary of the model
model.summary()
# -----------------------------------------------------------
num_epoch = 32


# Define early stopping callback
# earlyStopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=0, verbose=0, mode='auto')

# Fit the model
history = model.fit(
    X_train, Y_train,
    validation_data=(X_test, Y_test),
    epochs=num_epoch,  # corrected from nb_epoch to epochs
    batch_size=batch_size
)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_2 (LSTM)               (None, 2)                 64        
                                                                 
 dropout_1 (Dropout)         (None, 2)                 0         
                                                                 
 dense_4 (Dense)             (None, 2)                 6         
                                                                 
Total params: 70 (280.00 Byte)
Trainable params: 70 (280.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/32
Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32
Epoch 12/32
Epoch 13/32
Epoch 14/32
Epoch 15/32
Epoch 16/32
Epoch 17/32
Epoch 18/32
Epoch 19/32
Epoch 20/32
Epoch 21/32
Epoch 22/32
Epoch 23/32
Epoch 24/32
Epoch

In [16]:
import joblib
from tensorflow.keras.models import model_from_json

model_json = model.to_json()
with open("modelK.json", "w") as json_file:
    json_file.write(model_json)
model.save_weights("modelK_weights.h5")

# Lưu siêu tham số và các thông tin khác bằng joblib
model_info = {
    "modelK_json_path": "modelK.json",
    "modelK_weights_path": "modelK_weights.h5",
    "optimizer": {
        "name": "rmsprop",
        "learning_rate": 0.001
    },
    "loss": 'mae'
}

joblib.dump(model_info, 'modelK_info.pkl')

['modelK_info.pkl']