<a href="https://colab.research.google.com/github/minhyeok2487/Deep_Learning/blob/main/LSTM%EC%9D%84_%ED%99%9C%EC%9A%A9%ED%95%9C_NC%EC%86%8C%ED%94%84%ED%8A%B8_%EC%A3%BC%EA%B0%80%EC%98%88%EC%B8%A1/LSTM%EC%9D%84_%ED%99%9C%EC%9A%A9%ED%95%9C_NC%EC%86%8C%ED%94%84%ED%8A%B8_%EC%A3%BC%EA%B0%80%EC%98%88%EC%B8%A1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# LSTM을 활용한 NC소프트 주가예측

## 1. 필요한 모듈 Import

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import os

%matplotlib inline
warnings.filterwarnings('ignore')

**주가 데이터 관리 패키지 FinanceDataReader**
- [GitHub Repo](https://github.com/FinanceData/FinanceDataReader)

In [None]:
!pip install finance-datareader

In [None]:
import FinanceDataReader as fdr

## 2. 데이터 Load

In [None]:
# NC소프트 주식코드: 036570
STOCK_CODE = '036570'

In [None]:
stock = fdr.DataReader(STOCK_CODE)

In [None]:
stock

## 3. 데이터 EDA

### 1) 연도, 월, 일 나누기

In [None]:
stock['Year'] = stock.index.year
stock['Month'] = stock.index.month
stock['Day'] = stock.index.day

In [None]:
stock

In [None]:
#연도와 월을 같이 묶어서 보고 싶다.
stock.groupby(['Year','Month']).mean()

### 2) 종가 데이터 그래프

In [None]:
plt.figure(figsize=(16, 9))
plt.plot(stock['Close'])
plt.xlabel('Date')
plt.ylabel('Price')
plt.show()

In [None]:
# 2008~2012, 2012~2016, 2016~2020, 2020~2021
time_steps = [['2012', '2014'],
              ['2014', '2016'],
              ['2016', '2018'],
              ['2018', '2021']]

fig, axes = plt.subplots(2,2)
fig.set_size_inches(20,12)


for i in range(4):
    ax = axes[i//2, i%2]
    df = stock.loc[(stock.index >= time_steps[i][0]) & (stock.index < time_steps[i][1])]
    sns.lineplot(y=df['Close'], x=df.index, ax=ax)
    ax.set_title(f'{time_steps[i][0]}~{time_steps[i][1]}')
    ax.set_xlabel('time')
    ax.set_ylabel('price')
plt.tight_layout()
plt.show()

### 3) 정규화. MinMaxScaler 전처리

In [None]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
scale_cols = ['Open', 'High', 'Low', 'Close', 'Volume']
scaled = scaler.fit_transform(stock[scale_cols])

In [None]:
scaled.min(), scaled.max()

### 3) 데이터 프레임 생성

In [None]:
df = pd.DataFrame(scaled, columns=scale_cols)
df

## 4. 모델 생성

### 1) Train 데이터, Test 데이터 분할

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(df.drop('Close', 1), 
                                                    df['Close'], 
                                                    test_size=0.2, 
                                                    random_state=0, 
                                                    shuffle=False)

In [None]:
x_train.shape, y_train.shape

In [None]:
x_test.shape, y_test.shape

In [None]:
x_train

### 2) Windowed_dataset 함수 생성

In [None]:
import tensorflow as tf
def windowed_dataset(series, window_size, batch_size, shuffle):
    series = tf.expand_dims(series, axis=-1)
    ds = tf.data.Dataset.from_tensor_slices(series)
    ds = ds.window(window_size+1, shift=1, stride=1, drop_remainder=True)
    ds = ds.flat_map(lambda w: w.batch(window_size + 1))
    if shuffle:
        ds = ds.shuffle(1000)
    ds = ds.map(lambda w: (w[:-1], w[-1]))
    return ds.batch(batch_size).prefetch(1)

### 3) 하이퍼 파라미터 정의

In [None]:
WINDOW_SIZE=20
BATCH_SIZE=32

In [None]:
train_data = windowed_dataset(y_train, WINDOW_SIZE, BATCH_SIZE, True)
test_data = windowed_dataset(y_test, WINDOW_SIZE, BATCH_SIZE, False)

In [None]:
for data in train_data.take(1):
    print(f'데이터셋(X) 구성(batch_size, window_size, feature갯수): {data[0].shape}')
    print(f'데이터셋(Y) 구성(batch_size, window_size, feature갯수): {data[1].shape}')

### 4) 모델 구현

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Conv1D, Lambda
from tensorflow.keras.losses import Huber
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint


model = Sequential([
    Conv1D(filters=32, kernel_size=5, padding='causal', activation = 'relu', input_shape=[WINDOW_SIZE, 1]),
    LSTM(16),
    Dense(16, activation='relu'),
    Dense(1),
    Lambda(lambda x: x*20),#스케일 업
])

In [None]:
loss = Huber()
adam = Adam(lr=0.0005)
model.compile(optimizer=adam, loss=loss, metrics=['mae'])

In [None]:
earlystopping = EarlyStopping(monitor='val_loss', patience=10)
filename = 'tmp_checkpoint.ckpt'
checkpoint = ModelCheckpoint(filepath=filename,
                             save_best_only = True,
                             save_weights_only = True,
                             monitor='val_loss',
                             verbose=1)

In [None]:
model.fit(train_data, validation_data=(test_data),
          epochs=50,
          callbacks=[earlystopping, checkpoint])

### 5) 모델 시각화

In [None]:
model.load_weights(filename)

In [None]:
pred = model.predict(test_data)

In [None]:
plt.figure(figsize=(12, 9))
plt.plot(np.asarray(y_test)[20:], label='actual')
plt.plot(pred, label='prediction')
plt.legend()
plt.show()