In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
kospi = pd.read_csv('KOSPI_all2.csv')
kospi

Unnamed: 0,SIDX,SCODE,SCOMP,SDAY,OPEN_P,HIGH_P,LOW_P,CLOSE_P,VOL
0,20201008095570,95570,AJ네트웍스,20201008,3285,3330,3225,3300,62021
1,20201007095570,95570,AJ네트웍스,20201007,3345,3345,3200,3280,81367
2,20201006095570,95570,AJ네트웍스,20201006,3380,3460,3315,3340,199832
3,20201005095570,95570,AJ네트웍스,20201005,3250,3400,3230,3380,211552
4,20200929095570,95570,AJ네트웍스,20200929,3225,3290,3195,3245,49795
...,...,...,...,...,...,...,...,...,...
212431,20190910003280,3280,흥아해운,20190910,456,465,452,462,115411
212432,20190909003280,3280,흥아해운,20190909,465,471,451,456,137956
212433,20190906003280,3280,흥아해운,20190906,466,469,458,465,163669
212434,20190905003280,3280,흥아해운,20190905,462,467,456,465,325544


In [3]:
# 신풍제약 주가 데이터만 추출

containing = kospi['SCOMP'].str.contains('신풍제약')
shinpoong = kospi[containing]

In [4]:
shinpoong_ready = shinpoong.reset_index()
shinpoong_ready.drop(['index'],axis='columns',inplace=True)

In [5]:
shinpoong_ready

Unnamed: 0,SIDX,SCODE,SCOMP,SDAY,OPEN_P,HIGH_P,LOW_P,CLOSE_P,VOL
0,20201008019170,19170,신풍제약,20201008,138000,140500,135500,140000,1301367
1,20201007019170,19170,신풍제약,20201007,134000,144000,132500,135500,3006442
2,20201006019170,19170,신풍제약,20201006,127500,137000,125000,134000,2832451
3,20201005019170,19170,신풍제약,20201005,130500,134500,122500,127000,3157659
4,20200929019170,19170,신풍제약,20200929,137000,138000,123500,126500,3151057
...,...,...,...,...,...,...,...,...,...
265,20190910019170,19170,신풍제약,20190910,6250,6250,5990,6020,95005
266,20190909019170,19170,신풍제약,20190909,6070,6130,5960,5980,88095
267,20190906019170,19170,신풍제약,20190906,6070,6110,6000,6070,85538
268,20190905019170,19170,신풍제약,20190905,6150,6160,6030,6050,134077


In [6]:
# 시간데이터 포맷 변환

shinpoong_ready['일자'] = pd.to_datetime(shinpoong_ready['SDAY'], format='%Y%m%d')

In [7]:
cols = shinpoong_ready.columns.tolist()
cols

['SIDX',
 'SCODE',
 'SCOMP',
 'SDAY',
 'OPEN_P',
 'HIGH_P',
 'LOW_P',
 'CLOSE_P',
 'VOL',
 '일자']

In [8]:
cols = cols[4:]

In [9]:
shinpoong_ready = shinpoong_ready[cols]

In [10]:
shinpoong_ready = shinpoong_ready[['일자','OPEN_P','HIGH_P','LOW_P','CLOSE_P','VOL']]

In [11]:
shinpoong_ready

Unnamed: 0,일자,OPEN_P,HIGH_P,LOW_P,CLOSE_P,VOL
0,2020-10-08,138000,140500,135500,140000,1301367
1,2020-10-07,134000,144000,132500,135500,3006442
2,2020-10-06,127500,137000,125000,134000,2832451
3,2020-10-05,130500,134500,122500,127000,3157659
4,2020-09-29,137000,138000,123500,126500,3151057
...,...,...,...,...,...,...
265,2019-09-10,6250,6250,5990,6020,95005
266,2019-09-09,6070,6130,5960,5980,88095
267,2019-09-06,6070,6110,6000,6070,85538
268,2019-09-05,6150,6160,6030,6050,134077


In [12]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
scale_cols = ['OPEN_P', 'HIGH_P', 'LOW_P', 'CLOSE_P', 'VOL']
shinpoong_scaled = scaler.fit_transform(shinpoong_ready[scale_cols])

shinpoong_scaled = pd.DataFrame(shinpoong_scaled)
shinpoong_scaled.columns = scale_cols

print(shinpoong_scaled)

       OPEN_P    HIGH_P     LOW_P   CLOSE_P       VOL
0    0.660287  0.656542  0.748619  0.698011  0.021489
1    0.641148  0.672897  0.732044  0.674581  0.049644
2    0.610048  0.640187  0.690608  0.666771  0.046771
3    0.624402  0.628505  0.676796  0.630324  0.052141
4    0.655502  0.644860  0.682320  0.627721  0.052032
..        ...       ...       ...       ...       ...
265  0.029904  0.029206  0.033094  0.000417  0.001569
266  0.029043  0.028645  0.032928  0.000208  0.001455
267  0.029043  0.028551  0.033149  0.000677  0.001412
268  0.029426  0.028785  0.033315  0.000573  0.002214
269  0.028660  0.028738  0.032818  0.000677  0.002937

[270 rows x 5 columns]


In [13]:
TEST_SIZE = 100

train = shinpoong_scaled[:-TEST_SIZE]
test = shinpoong_scaled[-TEST_SIZE:]

In [14]:
def make_dataset(data, label, window_size=15):
    feature_list = []
    label_list = []
    for i in range(len(data) - window_size):
        feature_list.append(np.array(data.iloc[i:i+window_size]))
        label_list.append(np.array(label.iloc[i+window_size]))
    return np.array(feature_list), np.array(label_list)

In [15]:
feature_cols = ['OPEN_P', 'HIGH_P', 'LOW_P', 'VOL']
label_cols = ['CLOSE_P']

train_feature = train[feature_cols]
train_label = train[label_cols]

# train dataset
train_feature, train_label = make_dataset(train_feature, train_label, 15)

# train, validation set 생성
from sklearn.model_selection import train_test_split
x_train, x_valid, y_train, y_valid = train_test_split(train_feature, train_label, test_size=0.1)

In [16]:
x_train.shape, x_valid.shape

((139, 15, 4), (16, 15, 4))

In [17]:
test_feature = test[feature_cols]
test_label = test[label_cols]

In [18]:
# test dataset (실제 예측 해볼 데이터)
test_feature, test_label = make_dataset(test_feature, test_label, 15)

In [19]:
test_feature.shape, test_label.shape

((85, 15, 4), (85, 1))

In [23]:
from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.layers import LSTM

model = Sequential()
model.add(LSTM(16, 
               input_shape=(train_feature.shape[1], train_feature.shape[2]), 
               activation='relu', 
               return_sequences=False)
          )
model.add(Dense(1))

Using TensorFlow backend.


ImportError: Could not find 'cudnn64_7.dll'. TensorFlow requires that this DLL be installed in a directory that is named in your %PATH% environment variable. Note that installing cuDNN is a separate step from installing CUDA, and this DLL is often found in a different directory from the CUDA DLLs. You may install the necessary DLL by downloading cuDNN 7 from this URL: https://developer.nvidia.com/cudnn

In [None]:
model.compile(loss='mean_squared_error', optimizer='adam')
early_stop = EarlyStopping(monitor='val_loss', patience=5)
filename = os.path.join(model_path, 'tmp_checkpoint.h5')
checkpoint = ModelCheckpoint(filename, monitor='val_loss', verbose=1, save_best_only=True, mode='auto')

history = model.fit(x_train, y_train, 
                    epochs=200, 
                    batch_size=16,
                    validation_data=(x_valid, y_valid), 
                    callbacks=[early_stop, checkpoint])

In [None]:
# weight 로딩
model.load_weights(filename)

# 예측
pred = model.predict(test_feature)