## 1. Import Packages.

In [None]:
from preprocess_utils import (load_data, 
                              datetime_convert, 
                              data_split, 
                              Normalize)

from model_utils import (ARIMA_Model, 
                        split_sequence,
                        split_sequence_only_y, 
                        LSTM_model)

from visualize_utils import (timeseries_plotting, 
                             predicted_plotting)

import pandas as pd
import json
import tensorflow as tf
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

## 2. Load Parameters

In [None]:
## Load a Config File
with open('parameters.params', 'r') as cfg:
    config = json.load(cfg)

In [None]:
main_parameters = config['main_parameters']
arima_parameters = config['arima_parameters']
xgboost_parameters = config['xgboost_parameters']
rnn_parameters = config['rnn_parameters']
lstm_parameters = config['lstm_parameters']


## 3. Load data.

### (1) Load a data file.

In [None]:
data = load_data(main_parameters['data_dir'], encoding = 'euc-kr')
data.head()

In [None]:
time_column = main_parameters['time_column'][0]
value_column = main_parameters['value_column']

### (2) Date to Datetime and Sorting

In [None]:
data = datetime_convert(data, main_parameters['time_column'][0], format = main_parameters['time_column'][1])
data = data.sort_values(by = main_parameters['time_column'][0]).reset_index(drop = True)
data.head()

### (3) Original Timeseries Plot

In [None]:
values = data[main_parameters['value_column']]
times = data[main_parameters['time_column'][0]]

In [None]:
timeseries_plotting(times, values)


### (4) Train, Test Split

#### - 비율 기준 (0.8 : 0.2)

In [None]:
(train, test) = data_split(data, main_parameters['time_column'][0], by = 'proportion', proportion = 0.8)
test.head()


#### - 날짜 기준 ('2020-01-01')

In [None]:
(train, test) = data_split(data, main_parameters['time_column'][0], by = 'time', time_sep = '2020-01-01')
test.head()


## 4. ARIMA

### (1) ARIMA Fitting (Best Diff.)

In [None]:
arima_model = ARIMA_Model(train[value_column])
best_diffs = arima_model.estimate_diff()
arima_model.build_model(diff = best_diffs, seasonal = False)

### (2) Summary ARIMA model

In [None]:
arima_model.summary()

### (3) ARIMA Model diagnostics

In [None]:
arima_model.plot_diagnostics()

### (4) Test Predict

In [None]:
(pred, pred_upper, pred_lower) = arima_model.predict(len(test[time_column]), time_test = test[value_column])

### (5) Predict Visualization

In [None]:
predicted_plotting(train_date = train[time_column], 
                   train_value = train[value_column], 
                   predicted_date = test[time_column], 
                   predicted_value = pred, 
                   predicted_conf = [pred_lower, pred_upper],
                   ground_truth = test[value_column])

### (6) Performance Metrics

In [None]:
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
print("Test MSE score: {}".format(mean_squared_error(pred, test[value_column])))
print("Test R2 score: {}".format(r2_score(pred, test[value_column])))

## 6. LSTM

### (1) Options Setting

In [None]:
nsteps = 5
lstm_layers = [50, 50]
epochs = 100
learning_rate = 0.1
batch_size = 16

### (2) LSTM Dataset 구성

#### - 단일 y만 가지고 분석

In [None]:
nfeatures = 1

In [None]:
train_X, train_y = split_sequence_only_y(train[value_column], n_steps = nsteps)
test_X, test_y = split_sequence_only_y(test[value_column], n_steps = nsteps)

#### - x feature 추가 사용

In [None]:
nfeatures = 3
features = ['거래량', '거래대금', '상장시가총액']

train_X, train_y = split_sequence(train.loc[:, features], train[value_column], n_steps = nsteps)
test_X, test_y = split_sequence(test.loc[:, features], test[value_column], n_steps = nsteps)


#### - 차원 재구성

In [None]:
train_X = train_X.reshape((train_X.shape[0], train_X.shape[1], nfeatures))
test_X = test_X.reshape((test_X.shape[0], test_X.shape[1], nfeatures))

### (3) LSTM 모델 구성 (2 LSTM layers)

In [None]:
model = LSTM_model(lstm_layers = lstm_layers, nsteps = nsteps, nfeatures = nfeatures)

### (4) LSTM 모델 학습

In [None]:
earlyStopping = EarlyStopping(monitor='val_loss', patience=10, verbose=0, mode='min')
mcp_save = ModelCheckpoint('./result/best_model.h5', save_best_only=True, save_weights_only=True, monitor='val_loss', mode='min')
reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=7, verbose=1, epsilon=1e-4, mode='min')
optim = tf.keras.optimizers.Adam(lr=learning_rate)

model.compile(optimizer=optim, loss='mse')
model.fit(train_X, train_y, epochs=epochs, batch_size=batch_size, callbacks=[earlyStopping, mcp_save, reduce_lr_loss], validation_data=(test_X, test_y))

### (5) 예측

In [None]:
yhat = model.predict(test_X, verbose=0)
print(yhat[:3])

### (6) 시각화

In [None]:
predicted_plotting(train_date = train[time_column], 
                   train_value = train[value_column], 
                   predicted_date = test[time_column][5:], 
                   predicted_value = yhat[:, 0],
                   ground_truth = test[value_column][5:])

In [None]:
predicted_plotting(train_date = train[time_column][-2:], 
                   train_value = train[value_column][-2:], 
                   predicted_date = test[time_column][5:], 
                   predicted_value = yhat[:, 0],
                   ground_truth = test[value_column][5:])