# 1. Import Libraries

In [None]:
import os
import pandas as pd
import numpy as np
from pandas import read_csv
from datetime import datetime
from pandas import DataFrame
from sklearn import preprocessing
from sklearn.metrics import mean_squared_error, accuracy_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import KFold, train_test_split, TimeSeriesSplit
from matplotlib import pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.optimizers import Adam
from keras.models import load_model
from google.colab import drive
drive.mount('/content/drive')

# 2. Define Functions

In [None]:
def dataset_builder(df, y_index, history_points):
    arr = df.to_numpy()
    arr_normalizer = preprocessing.MinMaxScaler() # 데이터를 0~1 범위로 점철되게 하는 함수 call
    arr_normalized = arr_normalizer.fit_transform(arr) # 데이터를 0~1 범위로 점철되게 함수 수행

    # train data x 
    # 변수를 가지고 오되, 관찰일수 만큼 누적해서 쌓는다. (열방향으로)
    x_set = np.array([arr_normalized[i:i + history_points].copy() for i in range(len(arr_normalized) - history_points)]) 
    print('x_set: ', x_set.shape)
        
    # y data set
    y_set = np.array([arr_normalized[:, y_index][i + history_points].copy() for i in range(len(arr_normalized) - history_points)])
    y_set = np.expand_dims(y_set, -1) # 1XN 벡터 -> NX1 벡터로
    print('y_set: ', y_set.shape)

    y_set_unnorm = np.array([arr[:, y_index][i + history_points].copy() for i in range(len(arr) - history_points)])
    y_set_unnorm = np.expand_dims(y_set_unnorm, -1) # 1XN 벡터 -> NX1 벡터로

    y_set_normalizer = preprocessing.MinMaxScaler()
    y_set_normalizer.fit(y_set_unnorm)

    # 인풋 X : 그 이전의 변수 (from T = -50 to T = -1)
    # 아웃풋 y : 예측하고자 하는 변수 T = 0

    assert x_set.shape[0] == y_set.shape[0]
    return x_set, y_set, y_set_unnorm, y_set_normalizer

def split_train_test(x_set, y_set, y_set_unnorm, n_test):
    
    x_train = x_set[:-n_test]
    y_train = y_set[:-n_test]
    
    x_test = x_set[-n_test:]
    y_test = y_set[-n_test:]

    y_set_unnorm_test = y_set_unnorm[-n_test:]
    
    print('x_train: ', x_train.shape)
    print('x_test: ', x_test.shape)
    print('y_train: ', y_train.shape)
    print('y_test: ', y_test.shape)
    
    return x_train, y_train, x_test, y_test, y_set_unnorm_test

def model_builder(x_train):
    model = keras.Sequential()
    model.add(layers.Input(batch_shape=(None, x_train.shape[1], x_train.shape[2])))
    model.add(layers.LSTM(64, return_sequences=True))
    model.add(layers.LSTM(32, return_sequences=False))
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(16, activation = 'relu'))
    model.add(layers.Dense(1, activation = 'softmax'))
    model.compile(loss='mean_squared_error', optimizer='adam')
    model.summary()    
    return model

# 3. Import Dataset and Preprocessing
    file location needs to be changed 

In [None]:
df = pd.read_excel('/content/drive/MyDrive/Colab Notebooks/predict_220720_1.xlsx')
x_set, y_set, y_set_unnorm, y_set_normalizer = dataset_builder(df, y_index=0, history_points=50)

In [None]:
n_test = 24
x_train, y_train, x_test, y_test, y_set_unnorm_test = split_train_test(x_set, y_set, y_set_unnorm, n_test)

# 4. Build Model and Training

In [None]:
model = model_builder(x_train)

In [None]:
early_stop = EarlyStopping(monitor='val_loss', patience=15)
model_path = '/content/drive/MyDrive/Colab Notebooks/saved_models'
filename = os.path.join(model_path, 'tmp_checkpoint.h5')
checkpoint = ModelCheckpoint(filepath=filename, 
                             save_best_only=True, 
                             monitor='val_loss', 
                             verbose=1)

In [None]:
tscv = TimeSeriesSplit(gap=0, max_train_size=None, n_splits=10, test_size=None)
# i = 0
rmse = []
for train_index, val_index in tscv.split(x_train):
    print("TRAIN:", train_index, "VAL:", val_index)
    x_train_train, x_train_val = x_train[train_index], x_train[val_index]
    y_train_train, y_train_val = y_train[train_index], y_train[val_index]
    
    with tf.device("/device:GPU:0"):
        history = model.fit(x_train_train,
                            y_train_train,
                            epochs=200,
                            batch_size=10,
                            shuffle=False,
                            validation_data=(x_train_val,y_train_val),
                            callbacks=[checkpoint, early_stop],
                            verbose=1)
    rmse.append(model.evaluate(x_train_val))
    # history_path = os.path.join(model_path, 'model_' + str(i+1) + '.h5')
    # model.save(history_path)
    # i += 1

In [None]:
rmse

# 5. Prediction and Plot

In [None]:
model = keras.models.load_model(filename)
pred = model.predict(x_test)
print(pred.shape, y_test.shape)
y_true_plot = np.concatenate((y_train, y_test), axis=0)
y_pred_plot = np.concatenate((y_train, pred), axis=0)

In [None]:
plt.figure(figsize=(12, 9))
plt.plot(y_true_plot, label = 'actual')
plt.plot(y_pred_plot, label = 'prediction')
plt.legend()
plt.show()