# 병합된 데이터로 Attention_LSTM 훈련

In [2]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

seed_value = 743
print("Train with random seed", seed_value)

import os
os.environ['PYTHONHASHSEED'] = str(seed_value)
import random
random.seed(seed_value)
import numpy as np
np.random.seed(seed_value)
import tensorflow as tf
tf.random.set_seed(seed_value)
from tensorflow.keras import backend as K

Train with random seed 743


In [3]:
import warnings
warnings.filterwarnings('ignore')
import pickle
import joblib 
import pandas as pd
from tqdm import trange
import matplotlib.pyplot as plt

from tensorflow.keras.models import *
from tensorflow.keras.layers import Lambda, RepeatVector
from tensorflow.keras.layers import Input, multiply
from tensorflow.keras.layers import Dense, LSTM, Dropout, Flatten
from tensorflow.keras import regularizers

## 평가 지표 : SMAPE

In [6]:
# Define the function to return the SMAPE value
def calculate_smape(actual, predicted) -> float:
    """SMAPE 성능 지표를 계산하기 위한 함수 정의"""
  
    # Convert actual and predicted to numpy
    # array data type if not already
    if not all([isinstance(actual, np.ndarray), 
                isinstance(predicted, np.ndarray)]):
        actual, predicted = np.array(actual),
        np.array(predicted)
  
    return round(
        np.mean(
            np.abs(predicted - actual) / 
            ((np.abs(predicted) + np.abs(actual))/2)
        )*100, 2
    )

## 훈련할 데이터 load, 입력에 맞게 변환(window_size=24)

In [7]:
data_path = '../Merged_Data(droped)'

In [8]:
train_x_df = pd.read_csv(os.path.join(data_path, 'train_x.csv'))
train_y_df = pd.read_csv(os.path.join(data_path, 'train_y.csv'))
test_x_df = pd.read_csv(os.path.join(data_path, 'test_x.csv'))
test_y_df = pd.read_csv(os.path.join(data_path, 'test_y.csv'))
valid_x_df = pd.read_csv(os.path.join(data_path, 'valid_x.csv'))
valid_y_df = pd.read_csv(os.path.join(data_path, 'valid_y.csv'))

In [9]:
def create_dataset(X, y, window_size=24):
    data_x, data_y = [], []
    assert len(X) == len(y)
    for i in range(len(X) - window_size + 1):
        input_data = X[i: i + window_size, :]
        target_data = y[i + window_size - 1, 0]
        data_x.append(input_data)
        data_y.append(target_data)

    return np.array(data_x), np.array(data_y)

In [10]:
train_X, train_y = create_dataset(train_x_df.values, train_y_df.values)
test_X, test_y = create_dataset(test_x_df.values, test_y_df.values)
valid_X, valid_y = create_dataset(valid_x_df.values, valid_y_df.values)

In [11]:
print(train_X.shape, train_y.shape)
print(valid_X.shape, valid_y.shape)
print(test_X.shape, test_y.shape)

(55204, 24, 5) (55204,)
(3045, 24, 5) (3045,)
(3046, 24, 5) (3046,)


## Attention_LSTM 모델 정의

In [12]:
def attention_3d_block(inputs, input_dim, single_attention_vector):
    """Feature attention block 정의"""
    time_steps = int(inputs.shape[1])
    # Attention weights 계산
    a = Dense(input_dim, activation='softmax', name='attention_vec')(inputs)  # (batch_size, input_dim, time_step)
    if single_attention_vector:
        a = Lambda(lambda x: K.mean(x, axis=1), name='dim_reduction')(a)  # (batch_size, input_dim)
        a = RepeatVector(time_steps)(a)  # (batch_size, input_dim, time_step)
    output_attention_mul = multiply([inputs, a], name='attention_mul')  # Attention weights 적용
    return output_attention_mul

In [13]:
def model_attention_applied_before_lstm(batch_size, time_step, feature_num, single_attention_vector):
    """Attention LSTM 모델 정의"""
    inputs = Input(shape=(time_step, feature_num))
    x = attention_3d_block(inputs, feature_num, single_attention_vector)
    x = LSTM(6, activation='tanh',
             stateful=False,
             return_sequences=True,
             kernel_initializer='he_normal')(x)
    x = Dropout(0.2)(x)
    x = Flatten()(x)
    x = Dense(10, activation='linear', kernel_regularizer=regularizers.l2(0.01),
              activity_regularizer=regularizers.l1(0.))(x)
    output = Dense(1, activation='linear', kernel_initializer='he_normal')(x)

    model = Model(inputs=[inputs], outputs=output)
    return model

## 하이퍼 파라미터 정의

In [14]:
batch_size = 4
look_back = 24
feature_num = 5
SINGLE_ATTENTION_VECTOR = True

model = model_attention_applied_before_lstm(batch_size, look_back, feature_num, SINGLE_ATTENTION_VECTOR)
model.compile(loss='mean_squared_error', optimizer='adam')

## 모델 훈련(epoch=100)

In [15]:
# 모델 훈련 진행
# 훈련 과정의 손실값을 history 변수에 저장
history = model.fit(train_X, train_y,
                    validation_data=(valid_X, valid_y),
                    batch_size=batch_size, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

## 모델 저장, test set 예측 결과 확인

In [32]:
model.save('./Saved_Model/Attention_LSTM(epoch=100).pt')
test_predict = model.predict(test_X, batch_size)



INFO:tensorflow:Assets written to: ./Saved_Model/Attention_LSTM(epoch=100).pt\assets


INFO:tensorflow:Assets written to: ./Saved_Model/Attention_LSTM(epoch=100).pt\assets


## sclaer를 불러와서 inverse transform 적용

In [37]:
scaler = joblib.load('../Merged_Data/Scaler/Y_pm10.pkl')
inv_test_y = scaler.inverse_transform(test_y.reshape(-1, 1))
inv_test_predict = scaler.inverse_transform(test_predict)

## 성능 평가 : SMAPE

In [43]:
test_smape = calculate_smape(inv_test_y, inv_test_predict)
print("Test SMAPE: %.2f" % test_smape)

Train SMAPE: 22.31
Valid SMAPE: 30.70
Test SMAPE: 16.02


## 결과 시각화

In [None]:
def vizualization_self(train_term,name,test_y,pred_y):
    plt.figure(figsize=(32, 16))
    
    plt.plot(np.arange(train_term), test_y[:train_term], color='red', ls='-', lw=3, label='Raw Data')
    plt.plot(np.arange(train_term), pred_y[:train_term], color='blue', ls='--', lw=3, label='Raw Data')
    
    plt.xlabel('[Time]', fontsize=25, fontweight='bold')
    plt.ylabel('[PM10]', fontsize=25, fontweight='bold')
    plt.title('Prediction Visualization({:s})'.format(name),fontsize=30, weight='bold')
    plt.xticks( fontsize=15, fontweight='bold')
    plt.yticks( fontsize=15, fontweight='bold')
    plt.grid(True)
    plt.legend()
    return

In [None]:
vizualization_self(len(inv_test_y),'CNN-LSTM,epoch=100',inv_test_y,inv_test_predict)

In [None]:
vizualization_self(200,'CNN-LSTM,epoch=100',inv_test_y,inv_test_predict)