In [117]:

import numpy as np
from sklearn.preprocessing import MinMaxScaler, RobustScaler
scaler = MinMaxScaler(feature_range=(0, 1))
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import SimpleRNN, LSTM, TimeDistributed, Input
from sklearn.model_selection import GridSearchCV
import tensorflow as tf
from keras.saving import register_keras_serializable

def create_dataset(dataset, look_back=1):
    dataX, dataY = [], []
    for i in range(0, len(dataset)-2 *look_back+1, look_back):
        a = dataset[i:(i+look_back)]
        dataX.append(a)
        dataY.append(dataset[i + look_back: i+ 2*look_back])
    return np.array(dataX), np.array(dataY)

def exponential_moving_average(data, span):
    return data.ewm(span=span, adjust=False).mean()

def read_data(file_path):
    from pandas import read_csv
    series_influ_A_df = read_csv(file_path, index_col=0, engine='python')
    series_influ_A_df = series_influ_A_df.rename(columns= {"Influenza A - All types of surveillance": "case"})
    series_influ_A_df = series_influ_A_df[["case", "temp", "tempmax", "dew","windspeed"]]
    return series_influ_A_df.dropna()

def prepare_data(series, look_back, scaler, is_ema = False):
    if is_ema:
        span = 52  # Bạn có thể điều chỉnh độ dài span tùy ý
        series['case'] = exponential_moving_average(series['case'], span)
    series = series.astype('float32')
    series = series.values
    dataset = scaler.fit_transform(series)

    rest = len(dataset) % look_back
    dataset = dataset[rest:, :]
    trainsize = len(dataset) - look_back
    train = dataset[:trainsize, :]
    test = dataset[trainsize - look_back:, :]

    trainX, trainY = create_dataset(train, look_back)
    testX, testY = create_dataset(test, look_back)
    return trainX, trainY, testX, testY

def forecast(input, model):
    predicted = model.predict(input, verbose=0)
    return predicted

In [118]:
@register_keras_serializable()
class MyLSTM (Sequential):
    def __init__(self, look_back, dense_units =[],unit=64, optimizer='adam',name='lstm'):
        super().__init__(name=name)
        self.look_back = look_back
        self.add(Input(shape=(look_back,5)))
        self.add(LSTM(units=unit, activation='relu', return_sequences=True))
        for unit in dense_units:
            self.add(Dense(units=unit, activation='relu'))
        self.add(TimeDistributed(Dense(units=5, activation='sigmoid' )))
        self.compile(optimizer=optimizer, loss='mse', metrics=[tf.keras.metrics.RootMeanSquaredError()])


In [119]:
df = read_data('../temp_data/influA_vietnam_last_10_days.csv')
look_back =15
trainX, trainY, testX, testY = prepare_data(df, look_back, scaler, is_ema=True)

In [120]:
model = MyLSTM(look_back=look_back, dense_units=[32,16])

In [121]:
model.summary()

In [122]:
# history = model.fit(trainX, trainY, epochs=200, batch_size=1, verbose=2)

In [123]:
# testY_hat = forecast(testX, model)

In [124]:
# y_hat_inverse = np.expand_dims(scaler.inverse_transform(testY_hat[0]), axis=0)
# y_inverse = np.expand_dims(scaler.inverse_transform(testY[0]), axis=0)


In [125]:
def plot(testY, forecasts):
    import matplotlib.pyplot as plt
    forecastsPlot = forecasts[:,:,0].reshape(-1)
    testPlot = testY[:,:,0].reshape(-1)
    plt.plot(testPlot, "-y", label="actual", marker= '.')
    plt.plot(forecastsPlot, color = 'green', label="forecast")
    plt.ylabel("Number of infections")
    plt.legend(["actual", "forecast"])
    plt.show()


In [126]:
# plot(y_inverse, y_hat_inverse)
# print(model.predict(trainX).shape)
# print(trainY.shape)
# forecasts = model.predict(trainX)
# plot(trainY, forecasts)

In [127]:
import itertools
import tensorflow as tf
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint

def LSTM_HyperParameter_Tuning(config, df, scaler):
    
    n_neurons, n_batch_sizes, dropouts, look_backs = config
    possible_combinations = list(itertools.product(n_neurons, n_batch_sizes, dropouts, look_backs))
    
    print(possible_combinations)
    print('\n')
    
    hist = []
    for i in range(0, len(possible_combinations)):
        print(f'{i+1}th combination: \n')
        print('--------------------------------------------------------------------')
        n_neurons, n_batch_size, dropout, look_back = possible_combinations[i]

        trainX, trainY, testX, testY = prepare_data(df, look_back, scaler, is_ema=True)
        model = MyLSTM(look_back=look_back, dense_units=n_neurons[1:], unit=n_neurons[0])

        es = EarlyStopping(monitor='loss', mode='min', verbose=1, patience=5)
        '''''
        From the mentioned article above --> If a validation dataset is specified to the fit() function via the validation_data or v
        alidation_split arguments,then the loss on the validation dataset will be made available via the name “val_loss.”
        '''''
        file_path = '../model/best_lstm_many_to_many_model.keras'

        mc = ModelCheckpoint(file_path, monitor='loss', mode='min', verbose=1, save_best_only=True)

        '''''
        cb = Callback(...)  # First, callbacks must be instantiated.
        cb_list = [cb, ...]  # Then, one or more callbacks that you intend to use must be added to a Python list.
        model.fit(..., callbacks=cb_list)  # Finally, the list of callbacks is provided to the callback argument when fitting the model.
        '''''

        model.fit(trainX, trainY,batch_size=n_batch_size, callbacks=[es, mc], verbose=0, epochs=200)
        train_accuracy = model.evaluate(trainX, trainY, verbose=0)
        test_accuracy = model.evaluate(testX, testY, verbose=0)
        hist.append(list((n_neurons, n_batch_size, dropout,look_back,
                          train_accuracy, test_accuracy)))
        # print(f'{str(i)}-th combination = {possible_combinations[i]} \n train accuracy: {train_accuracy}')
        
        # print('--------------------------------------------------------------------')
        # print('--------------------------------------------------------------------')
        # print('--------------------------------------------------------------------')
        # print('--------------------------------------------------------------------')
         
    return hist

In [128]:
config = [[[64,32,16], [32,16], [64], [32]], [8, 16, 32], [0.2],[12,15,17]]  
hist = LSTM_HyperParameter_Tuning(config, df, scaler)

[([64, 32, 16], 8, 0.2, 12), ([64, 32, 16], 8, 0.2, 15), ([64, 32, 16], 8, 0.2, 17), ([64, 32, 16], 16, 0.2, 12), ([64, 32, 16], 16, 0.2, 15), ([64, 32, 16], 16, 0.2, 17), ([64, 32, 16], 32, 0.2, 12), ([64, 32, 16], 32, 0.2, 15), ([64, 32, 16], 32, 0.2, 17), ([32, 16], 8, 0.2, 12), ([32, 16], 8, 0.2, 15), ([32, 16], 8, 0.2, 17), ([32, 16], 16, 0.2, 12), ([32, 16], 16, 0.2, 15), ([32, 16], 16, 0.2, 17), ([32, 16], 32, 0.2, 12), ([32, 16], 32, 0.2, 15), ([32, 16], 32, 0.2, 17), ([64], 8, 0.2, 12), ([64], 8, 0.2, 15), ([64], 8, 0.2, 17), ([64], 16, 0.2, 12), ([64], 16, 0.2, 15), ([64], 16, 0.2, 17), ([64], 32, 0.2, 12), ([64], 32, 0.2, 15), ([64], 32, 0.2, 17), ([32], 8, 0.2, 12), ([32], 8, 0.2, 15), ([32], 8, 0.2, 17), ([32], 16, 0.2, 12), ([32], 16, 0.2, 15), ([32], 16, 0.2, 17), ([32], 32, 0.2, 12), ([32], 32, 0.2, 15), ([32], 32, 0.2, 17)]


1th combination: 

--------------------------------------------------------------------

Epoch 1: loss improved from inf to 0.06324, saving model

In [129]:
import pandas as pd
hist = pd.DataFrame(hist)
hist = hist.sort_values(by=[4], ascending=True)
hist

Unnamed: 0,0,1,2,3,4,5
11,"[32, 16]",8,0.2,17,"[0.01380903646349907, 0.11751184612512589]","[0.01786300726234913, 0.13365256786346436]"
8,"[64, 32, 16]",32,0.2,17,"[0.014267965219914913, 0.11944859474897385]","[0.021092969924211502, 0.14523419737815857]"
5,"[64, 32, 16]",16,0.2,17,"[0.015939975157380104, 0.1262536197900772]","[0.016859373077750206, 0.12984365224838257]"
14,"[32, 16]",16,0.2,17,"[0.016429759562015533, 0.12817862629890442]","[0.019596202298998833, 0.13998644053936005]"
19,[64],8,0.2,15,"[0.01696666330099106, 0.13025614619255066]","[0.021155333146452904, 0.1454487144947052]"
16,"[32, 16]",32,0.2,15,"[0.017079371958971024, 0.13068807125091553]","[0.02274533174932003, 0.15081554651260376]"
20,[64],8,0.2,17,"[0.017230145633220673, 0.1312636435031891]","[0.01823348179459572, 0.13503141701221466]"
6,"[64, 32, 16]",32,0.2,12,"[0.017268775030970573, 0.13141071796417236]","[0.024304812774062157, 0.1559000164270401]"
7,"[64, 32, 16]",32,0.2,15,"[0.017604876309633255, 0.13268336653709412]","[0.01915542595088482, 0.13840313255786896]"
0,"[64, 32, 16]",8,0.2,12,"[0.017677435651421547, 0.13295651972293854]","[0.029137611389160156, 0.170697420835495]"


In [132]:
from keras.models import load_model

model = load_model("../model/best_lstm_many_to_many_model.keras", custom_objects={'MyLSTM': MyLSTM})

trainX, trainY, testX, testY = prepare_data(df, 17, scaler, is_ema=True)
model.evaluate(testX, testY)

TypeError: <class '__main__.MyLSTM'> could not be deserialized properly. Please ensure that components that are Python object instances (layers, models, etc.) returned by `get_config()` are explicitly deserialized in the model's `from_config()` method.

config={'module': None, 'class_name': 'MyLSTM', 'config': {'name': 'lstm', 'trainable': True, 'dtype': 'float32', 'layers': [{'module': 'keras.layers', 'class_name': 'InputLayer', 'config': {'batch_shape': [None, 17, 5], 'dtype': 'float32', 'sparse': False, 'name': 'input_layer_78'}, 'registered_name': None}, {'module': 'keras.layers', 'class_name': 'LSTM', 'config': {'name': 'lstm_78', 'trainable': True, 'dtype': 'float32', 'return_sequences': True, 'return_state': False, 'go_backwards': False, 'stateful': False, 'unroll': False, 'zero_output_for_mask': False, 'units': 32, 'activation': 'relu', 'recurrent_activation': 'sigmoid', 'use_bias': True, 'kernel_initializer': {'module': 'keras.initializers', 'class_name': 'GlorotUniform', 'config': {'seed': None}, 'registered_name': None, 'shared_object_id': 1718718783808}, 'recurrent_initializer': {'module': 'keras.initializers', 'class_name': 'OrthogonalInitializer', 'config': {'gain': 1.0, 'seed': None}, 'registered_name': None, 'shared_object_id': 1718718788080}, 'bias_initializer': {'module': 'keras.initializers', 'class_name': 'Zeros', 'config': {}, 'registered_name': None, 'shared_object_id': 1718518019216}, 'unit_forget_bias': True, 'kernel_regularizer': None, 'recurrent_regularizer': None, 'bias_regularizer': None, 'activity_regularizer': None, 'kernel_constraint': None, 'recurrent_constraint': None, 'bias_constraint': None, 'dropout': 0.0, 'recurrent_dropout': 0.0, 'seed': None}, 'registered_name': None, 'build_config': {'input_shape': [None, 17, 5]}}, {'module': 'keras.layers', 'class_name': 'TimeDistributed', 'config': {'name': 'time_distributed_78', 'trainable': True, 'dtype': 'float32', 'layer': {'module': 'keras.layers', 'class_name': 'Dense', 'config': {'name': 'dense_144', 'trainable': True, 'dtype': 'float32', 'units': 5, 'activation': 'sigmoid', 'use_bias': True, 'kernel_initializer': {'module': 'keras.initializers', 'class_name': 'GlorotUniform', 'config': {'seed': None}, 'registered_name': None}, 'bias_initializer': {'module': 'keras.initializers', 'class_name': 'Zeros', 'config': {}, 'registered_name': None}, 'kernel_regularizer': None, 'bias_regularizer': None, 'kernel_constraint': None, 'bias_constraint': None}, 'registered_name': None, 'build_config': {'input_shape': [None, 32]}}}, 'registered_name': None, 'build_config': {'input_shape': [None, 17, 32]}}], 'build_input_shape': [None, 17, 5]}, 'registered_name': 'Custom>MyLSTM', 'build_config': {'input_shape': [None, 17, 5]}, 'compile_config': {'optimizer': 'adam', 'loss': 'mse', 'loss_weights': None, 'metrics': [{'module': 'keras.metrics', 'class_name': 'RootMeanSquaredError', 'config': {'name': 'root_mean_squared_error', 'dtype': 'float32'}, 'registered_name': None}], 'weighted_metrics': None, 'run_eagerly': False, 'steps_per_execution': 1, 'jit_compile': False}}.

Exception encountered: MyLSTM.__init__() missing 1 required positional argument: 'look_back'