# Stock 

## 주가 예측 LSTM

---

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline


### 파일 로드

In [None]:
df_price = pd.read_csv('data/samsung.csv', encoding='utf8')
df_price

In [None]:
df_price.describe()

### 날짜 형식 변경

In [None]:
type(df_price['일자'][0])

In [None]:
df_price['일자'] = pd.to_datetime(df_price['일자'], format='%Y%m%d')
df_price

### 날짜 컬럼 생성

In [None]:
df_price['연도'] =df_price['일자'].dt.year
df_price['월'] =df_price['일자'].dt.month
df_price['일'] =df_price['일자'].dt.day

In [None]:
df_price

In [None]:
# 1990년도 이상 선택
df = df_price.loc[df_price['연도']>=1990]

plt.figure(figsize=(12, 8))
sns.lineplot(y=df['종가'], x=df['일자'])
plt.xlabel('time')
plt.ylabel('price')
plt.show()

### Normalization

In [None]:
from sklearn.preprocessing import MinMaxScaler

In [None]:
scale_cols = ['시가', '고가', '저가', '종가', '거래량']

In [None]:
scaler = MinMaxScaler()

df_scaled = pd.DataFrame( scaler.fit_transform(df[scale_cols]) )

df_scaled.columns = scale_cols
df_scaled

### Split Data: Train + Validation + Test

In [None]:
from sklearn.model_selection import train_test_split

#### Data type 수정 함수: DataFrame --> array

In [None]:
def make_dataset(data, label, window_size=20):
    feature_list = []
    label_list = []
    
    for i in range(len(data) - window_size):
        feature_list.append(np.array(data.iloc[i:i+window_size]))
        label_list.append(np.array(label.iloc[i+window_size]))
        
    return np.array(feature_list), np.array(label_list)

#### 컬럼 설정: 독립변수, 종속변수

In [None]:
feature_cols = ['시가', '고가', '저가', '거래량']
label_cols   = ['종가']

#### Data set: Train + Test

In [None]:
TEST_SIZE = 200
WIN_SIZE  = 20

train = df_scaled[:-TEST_SIZE]
test  = df_scaled[-TEST_SIZE:]

#### Test data set 생성

In [None]:
x_test, y_test = make_dataset( test[feature_cols], test[label_cols] , WIN_SIZE)

x_test.shape, y_test.shape
# ((180, 20, 4), (180, 1))

#### Train, Validation data set 생성

In [None]:
train_feature, train_label = make_dataset(train[feature_cols], train[label_cols], WIN_SIZE)

# train, validation set 생성
x_train, x_valid, y_train, y_valid = train_test_split(train_feature, train_label, test_size=0.2)

x_train.shape, x_valid.shape
# ((6086, 20, 4), (1522, 20, 4))

### LSTM

In [None]:
#!pip install keras

In [None]:
#!pip install tensorflow

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.layers import LSTM

### 모델 생성

In [None]:
model = Sequential()
model.add(LSTM(16, 
               input_shape=(train_feature.shape[1], train_feature.shape[2]), 
               activation='relu', 
               return_sequences=False)
          )
model.add(Dense(1))

### Training

In [None]:
%%time
filename = './data/tmp_checkpoint.h5'

model.compile(loss='mean_squared_error', optimizer='adam')
early_stop = EarlyStopping(monitor='val_loss', patience=5)
checkpoint = ModelCheckpoint(filename, monitor='val_loss', verbose=1, save_best_only=True, mode='auto')

history = model.fit(x_train, y_train, 
                    #epochs=200, 
                    epochs=20, 
                    batch_size=16,
                    validation_data=(x_valid, y_valid), 
                    callbacks=[early_stop, checkpoint])

### weight 로딩

In [None]:
model.load_weights(filename)

### Prediction

In [None]:
pred = model.predict(x_test)

### Visualization

In [None]:
plt.figure(figsize=(12, 9))
plt.plot(y_test, label='actual')
plt.plot(pred, label='prediction')
plt.legend()
plt.show()

---

In [None]:
# End of file