In [34]:

%matplotlib inline
import os
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import SimpleRNN

from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import math
from sklearn.metrics import mean_squared_error
import FinanceDataReader as fdr

In [45]:
pandf = fdr.DataReader("HSI","2009-01-01","2019-01-01")


In [46]:
#특성 추가 ------------------------------------------
    #이동평균선
def get_MA(df):
    MA_26=df["Close"].rolling(26).mean()
    MA_52=df["Close"].rolling(52).mean()
    df=df.assign(MA_26=MA_26,MA_52=MA_52).dropna()
    
    return df

    
    #스토캐스틱
def get_stochastic(df, n=15, m=5, t=3):
    # n일중 최고가
    ndays_high = df.High.rolling(window=n, min_periods=1).max()
    # n일중 최저가
    ndays_low = df.Low.rolling(window=n, min_periods=1).min()
 
    # Fast%K 계산
    kdj_k = ((df.Close - ndays_low) / (ndays_high - ndays_low))*100
    # Fast%D (=Slow%K) 계산
    kdj_d = kdj_k.ewm(span=m).mean()
    # Slow%D 계산
    kdj_j = kdj_d.ewm(span=t).mean()
 
    # dataframe에 컬럼 추가
    df = df.assign(kdj_k=kdj_k, kdj_d=kdj_d, kdj_j=kdj_j).dropna()
    
    return df
   
    #시간
def get_time(df):
    time=np.linspace(0,10,len(df),endpoint=False).reshape(-1,1)
    df=df.assign(time=time)
    
    return df

In [47]:
df=pandf
df=get_time(df)
df=get_stochastic(df)
df=get_MA(df)
# convert nparray

nparr = df.values
nparr.astype('float32')
print(nparr)

[[1.31309200e+04 1.32051700e+04 1.32051700e+04 ... 7.21837671e+01
  1.26547212e+04 1.31545973e+04]
 [1.28335100e+04 1.31579000e+04 1.31579000e+04 ... 7.62654446e+01
  1.26395369e+04 1.31121108e+04]
 [1.34474200e+04 1.30015500e+04 1.34510200e+04 ... 8.15532789e+01
  1.26354119e+04 1.30714206e+04]
 ...
 [2.54788800e+04 2.58188100e+04 2.59206300e+04 ... 1.90077156e+01
  2.61613235e+04 2.58929304e+04]
 [2.55042000e+04 2.55023400e+04 2.56002500e+04 ... 1.75605486e+01
  2.61483950e+04 2.58872133e+04]
 [2.58457000e+04 2.57325200e+04 2.59162300e+04 ... 2.11423913e+01
  2.61435577e+04 2.58949179e+04]]


In [48]:
# normalization
scaler = MinMaxScaler(feature_range=(0, 1))
nptf = scaler.fit_transform(nparr)

In [49]:
# 값과 이평선 비교하기 ----------------

MA_26=df["Close"].rolling(26).mean()
y_before=MA_26.dropna()
df_before=df["Close"][25:]
y=np.where(df_before.shift(-1)>y_before.shift(-1),1,0)
X=nptf[25:]

In [50]:
# split train, test
train_size = int(len(nptf) * 0.7)
y_train2=y[:train_size]
y_test2=y[train_size:]
X_train2 = X[:train_size]
X_test2 = X[train_size:]



In [51]:
# reshape input to be [samples, time steps, features]
X_train = np.reshape(X_train2, (X_train2.shape[0], 1, X_train2.shape[1]))
X_test = np.reshape(X_test2, (X_test2.shape[0], 1, X_test2.shape[1]))
y_train=y_train2
y_test=y_test2


In [52]:
# simple lstm network learning
model = Sequential()
model.add(LSTM(36, input_shape=(1, 12)))
for i in range(5):
    model.add(Dense(36,activation='sigmoid'))
model.add(Dense(1,activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])
model.fit(X_train, y_train, epochs=10, batch_size=16,validation_split=0.1)
model.evaluate(X_test,y_test)

Train on 1526 samples, validate on 170 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


[0.3620023230711619, 0.8461538553237915]

In [53]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test,np.where(model.predict(X_test)>0.5,1,0))

0.8461538461538461

In [54]:
model.predict(X_test)

array([[0.94225633],
       [0.9044627 ],
       [0.9497017 ],
       [0.95666456],
       [0.95524436],
       [0.9595672 ],
       [0.95625424],
       [0.95052505],
       [0.94915533],
       [0.9469029 ],
       [0.9557338 ],
       [0.95875573],
       [0.95522386],
       [0.9546546 ],
       [0.95857704],
       [0.9606434 ],
       [0.96002525],
       [0.9585706 ],
       [0.95614624],
       [0.9197894 ],
       [0.91212547],
       [0.95739615],
       [0.95504606],
       [0.92672455],
       [0.60678214],
       [0.19961902],
       [0.17246139],
       [0.29376018],
       [0.4362071 ],
       [0.63524884],
       [0.9444803 ],
       [0.9433533 ],
       [0.94794536],
       [0.9399066 ],
       [0.9569495 ],
       [0.9491548 ],
       [0.9602443 ],
       [0.95695704],
       [0.95188653],
       [0.95542455],
       [0.95350814],
       [0.953367  ],
       [0.9162148 ],
       [0.37403074],
       [0.13834783],
       [0.10417059],
       [0.08462957],
       [0.083

In [33]:
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_test,np.where(model.predict(X_test)>0.5,1,0)))

[[251  46]
 [ 61 344]]


In [12]:
MA_26=df["Close"].rolling(26).mean()
MA_26=MA_26.dropna()
MA_26.shape


(2398,)

In [13]:
pandf.shape

(2474, 6)

In [14]:
MA_26.shape

(2398,)

In [15]:
X_train2.shape

(1696, 12)