In [1]:
from pykrx import stock
from pykrx import bond
import pandas as pd
import math
import datetime as dt
import numpy as np

from sklearn.metrics import mean_squared_error, mean_absolute_error, explained_variance_score, r2_score 
from sklearn.metrics import mean_poisson_deviance, mean_gamma_deviance, accuracy_score
from sklearn.metrics import make_scorer
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV
from keras.layers import Dropout
from keras.wrappers.scikit_learn import KerasRegressor

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM, GRU

from itertools import cycle

import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from matplotlib import pyplot as plt
import seaborn as sns

# SVR, RF : 종가 15일치 + MA5 1일치 사용 / LSTM, GRU, LSTM+GRU : 종가 15일치 + WMA foreign_보유수량 RSI MOM MA20 15일치 사용

# 업종별 (IT, 경기소비재, 금융, 소재) 알고리즘별 성능 비교(GridSearch 사용해서 하이퍼파라미터 적용, K-fold 교차검증을 통한 과적합 방지)

# 1. IT(삼성전자)

In [13]:
df = pd.read_csv('./stock_v11.csv', encoding='cp949')

scaler=MinMaxScaler(feature_range=(0,1))
sdf=scaler.fit_transform(np.array(df))

max_val = max(df['종가'])
min_val = min(df['종가'])

training_size=int(len(sdf)*0.70)
test_size=len(sdf)-training_size
train_data,test_data=sdf[0:training_size,:],sdf[training_size:len(sdf),:]

def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-time_step-1):
        a = dataset[i:(i+time_step), 0]
        b = [dataset[i+time_step-1, 2]] # 단일값 이기에 배열로 변환후 concatenate 수행
        data = np.concatenate((a, b)) 
        dataX.append(data)
        dataY.append(dataset[i + time_step, 0])
        
    return np.array(dataX), np.array(dataY)

time_step = 15
X_train, y_train = create_dataset(train_data, time_step)
X_test, y_test = create_dataset(test_data, time_step)

from sklearn.svm import SVR


params = [{ 
    'kernel': ['linear','rbf','poly'],
    'C': [1,5,10,50,100,500,1000],
    'gamma' : [0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1,3]
    
}]

svr_cv = GridSearchCV(estimator=SVR(), param_grid=params, cv= 5) 
svr_cv.fit(X_train, y_train)
svr_cv.best_params_

In [14]:
train_predict=svr_cv.predict(X_train)
test_predict=svr_cv.predict(X_test)

train_predict = train_predict.reshape(-1,1)
train_predict = train_predict*(max_val-min_val) + min_val

y_train = y_train.reshape(-1,1)
y_train = y_train*(max_val-min_val) + min_val

test_predict = test_predict.reshape(-1,1)
test_predict = test_predict*(max_val-min_val) + min_val

y_test = y_test.reshape(-1,1)
y_test = y_test*(max_val-min_val) + min_val


svr_rmse = math.sqrt(mean_squared_error(y_test,test_predict))
svr_r2 = r2_score(y_test,test_predict)
svr_cv.best_params_, svr_rmse, svr_r2


({'C': 1, 'gamma': 0.01, 'kernel': 'linear'},
 1229.2242261248673,
 0.9683650118747179)

In [16]:
df = pd.read_csv('./stock_v11.csv', encoding='cp949')

scaler=MinMaxScaler(feature_range=(0,1))
sdf=scaler.fit_transform(np.array(df))

max_val = max(df['종가'])
min_val = min(df['종가'])

training_size=int(len(sdf)*0.70)
test_size=len(sdf)-training_size
train_data,test_data=sdf[0:training_size,:],sdf[training_size:len(sdf),:]

def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-time_step-1):
        a = dataset[i:(i+time_step), 0]
        b = [dataset[i+time_step-1, 2]]
        data = np.concatenate((a, b)) 
        dataX.append(data)
        dataY.append(dataset[i + time_step, 0])
        
    return np.array(dataX), np.array(dataY)

time_step = 15
X_train, y_train = create_dataset(train_data, time_step)
X_test, y_test = create_dataset(test_data, time_step)

from sklearn.ensemble import RandomForestRegressor

params = [{ 
    'n_estimators': [10,50,100, 200, 500, 1000],
    'max_depth': [5,10,15,20,30],
    'min_samples_leaf' : [5,10,15,20,30],
    'min_samples_split' : [5,10,15,25,30]
    
}]

rf_cv = GridSearchCV(estimator=RandomForestRegressor(), param_grid=params, cv= 5) 
rf_cv.fit(X_train, y_train)


train_predict=rf_cv.predict(X_train)
test_predict=rf_cv.predict(X_test)

train_predict = train_predict.reshape(-1,1)
train_predict = train_predict*(max_val-min_val) + min_val

y_train = y_train.reshape(-1,1)
y_train = y_train*(max_val-min_val) + min_val

test_predict = test_predict.reshape(-1,1)
test_predict = test_predict*(max_val-min_val) + min_val

y_test = y_test.reshape(-1,1)
y_test = y_test*(max_val-min_val) + min_val

rf_rmse = math.sqrt(mean_squared_error(y_test,test_predict))
rf_r2 = r2_score(y_test,test_predict)
rf_cv.best_params_, rf_rmse, rf_r2


({'max_depth': 30,
  'min_samples_leaf': 10,
  'min_samples_split': 30,
  'n_estimators': 10},
 1542.6885478335378,
 0.9501733183596228)

In [4]:
df = pd.read_csv('./stock_v11.csv', encoding='cp949')

scaler=MinMaxScaler(feature_range=(0,1))
sdf=scaler.fit_transform(np.array(df))

max_val = max(df['종가'])
min_val = min(df['종가'])

training_size=int(len(sdf)*0.70)
test_size=len(sdf)-training_size
train_data,test_data=sdf[0:training_size,:],sdf[training_size:len(sdf),:]

def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-time_step-1):
        a = dataset[i:(i+time_step), [0,1,4,5,6,7]]
        dataX.append(a)
        dataY.append(dataset[i + time_step, 0])
        
    return np.array(dataX), np.array(dataY)

time_step = 15
X_train, y_train = create_dataset(train_data, time_step)
X_test, y_test = create_dataset(test_data, time_step)


# Keras 모델을 생성하는 함수를 정의합니다.
def create_model(dropout_rate):
    model = Sequential()
    model.add(LSTM(16, return_sequences=True, input_shape=(15, 6), activation='tanh'))
    model.add(LSTM(16, return_sequences=False))
    model.add(Dropout(dropout_rate))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model

# KerasRegressor를 사용하여 Keras 모델을 래핑합니다.
keras_model = KerasRegressor(build_fn=create_model)

# GridSearchCV의 param_grid를 수정합니다.
params = {'batch_size': [5, 10, 20, 30, 50], 'epochs': [10, 50, 100, 150, 200], 
          'dropout_rate': [0, 0.1, 0.2, 0.3, 0.4]}

# KerasRegressor로 GridSearchCV를 수행합니다.
lstm_cv = GridSearchCV(keras_model, param_grid=params, cv=5)
lstm_cv.fit(X_train, y_train, validation_data=(X_test, y_test), verbose=1)


train_predict=lstm_cv.predict(X_train)
test_predict=lstm_cv.predict(X_test)
train_predict.shape, test_predict.shape

train_predict = train_predict*(max_val-min_val) + min_val

y_train = y_train.reshape(-1,1)
y_train = y_train*(max_val-min_val) + min_val

test_predict = test_predict*(max_val-min_val) + min_val

y_test = y_test.reshape(-1,1)
y_test = y_test*(max_val-min_val) + min_val



lstm_rmse = math.sqrt(mean_squared_error(y_test,test_predict))
lstm_r2 = r2_score(y_test,test_predict)

lstm_cv.best_params_, lstm_rmse, lstm_r2

  keras_model = KerasRegressor(build_fn=create_model)


Epoch 1/50

KeyboardInterrupt: 

batch_size : 5, dropout_rate : 0.1, epochs: 150

RMSE : 935.6292851424948

R2 : 0.9816720847754098

In [4]:
df = pd.read_csv('./stock_v11.csv', encoding='cp949')

scaler=MinMaxScaler(feature_range=(0,1))
sdf=scaler.fit_transform(np.array(df))

max_val = max(df['종가'])
min_val = min(df['종가'])

training_size=int(len(sdf)*0.70)
test_size=len(sdf)-training_size
train_data,test_data=sdf[0:training_size,:],sdf[training_size:len(sdf),:]

def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-time_step-1):
        a = dataset[i:(i+time_step), [0,1,4,5,6,7]]
        dataX.append(a)
        dataY.append(dataset[i + time_step, 0])
        
    return np.array(dataX), np.array(dataY)

time_step = 15
X_train, y_train = create_dataset(train_data, time_step)
X_test, y_test = create_dataset(test_data, time_step)


model = Sequential()
model.add(LSTM(16, return_sequences=True, input_shape=(15, 6), activation='tanh'))
model.add(LSTM(16, return_sequences=False))
model.add(Dropout(0.1))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')


model.fit(X_train, y_train, validation_data=(X_test,y_test), epochs=10, batch_size=5)


train_predict=model.predict(X_train)
test_predict=model.predict(X_test)

train_predict = train_predict*(max_val-min_val) + min_val

y_train = y_train.reshape(-1,1)
y_train = y_train*(max_val-min_val) + min_val

test_predict = test_predict*(max_val-min_val) + min_val

y_test = y_test.reshape(-1,1)
y_test = y_test*(max_val-min_val) + min_val



lstm_rmse = math.sqrt(mean_squared_error(y_test,test_predict))
lstm_r2 = r2_score(y_test,test_predict)

lstm_rmse, lstm_r2

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


(1831.01724855888, 0.9298075623639647)

In [5]:
df = pd.read_csv('./stock_v11.csv', encoding='cp949')

scaler=MinMaxScaler(feature_range=(0,1))
sdf=scaler.fit_transform(np.array(df))

max_val = max(df['종가'])
min_val = min(df['종가'])

training_size=int(len(sdf)*0.70)
test_size=len(sdf)-training_size
train_data,test_data=sdf[0:training_size,:],sdf[training_size:len(sdf),:]

def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-time_step-1):
        a = dataset[i:(i+time_step), [0,1,4,5,6,7]]
        dataX.append(a)
        dataY.append(dataset[i + time_step, 0])
        
    return np.array(dataX), np.array(dataY)

time_step = 15
X_train, y_train = create_dataset(train_data, time_step)
X_test, y_test = create_dataset(test_data, time_step)


model = Sequential()
model.add(LSTM(8, return_sequences=True, input_shape=(15, 6), activation='tanh'))
model.add(LSTM(8, return_sequences=False))
model.add(Dropout(0.1))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')


model.fit(X_train, y_train, validation_data=(X_test,y_test), epochs=10, batch_size=5)


train_predict=model.predict(X_train)
test_predict=model.predict(X_test)

train_predict = train_predict*(max_val-min_val) + min_val

y_train = y_train.reshape(-1,1)
y_train = y_train*(max_val-min_val) + min_val

test_predict = test_predict*(max_val-min_val) + min_val

y_test = y_test.reshape(-1,1)
y_test = y_test*(max_val-min_val) + min_val



lstm_rmse = math.sqrt(mean_squared_error(y_test,test_predict))
lstm_r2 = r2_score(y_test,test_predict)

lstm_rmse, lstm_r2

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


(2579.978962538202, 0.8606401137249148)

In [6]:
df = pd.read_csv('./stock_v11.csv', encoding='cp949')

scaler=MinMaxScaler(feature_range=(0,1))
sdf=scaler.fit_transform(np.array(df))

max_val = max(df['종가'])
min_val = min(df['종가'])

training_size=int(len(sdf)*0.70)
test_size=len(sdf)-training_size
train_data,test_data=sdf[0:training_size,:],sdf[training_size:len(sdf),:]

def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-time_step-1):
        a = dataset[i:(i+time_step), [0,1,4,5,6,7]]
        dataX.append(a)
        dataY.append(dataset[i + time_step, 0])
        
    return np.array(dataX), np.array(dataY)

time_step = 15
X_train, y_train = create_dataset(train_data, time_step)
X_test, y_test = create_dataset(test_data, time_step)


model = Sequential()
model.add(LSTM(32, return_sequences=True, input_shape=(15, 6), activation='tanh'))
model.add(LSTM(32, return_sequences=False))
model.add(Dropout(0.1))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')


model.fit(X_train, y_train, validation_data=(X_test,y_test), epochs=10, batch_size=5)


train_predict=model.predict(X_train)
test_predict=model.predict(X_test)

train_predict = train_predict*(max_val-min_val) + min_val

y_train = y_train.reshape(-1,1)
y_train = y_train*(max_val-min_val) + min_val

test_predict = test_predict*(max_val-min_val) + min_val

y_test = y_test.reshape(-1,1)
y_test = y_test*(max_val-min_val) + min_val



lstm_rmse = math.sqrt(mean_squared_error(y_test,test_predict))
lstm_r2 = r2_score(y_test,test_predict)

lstm_rmse, lstm_r2

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


(1351.7123854966883, 0.9617462478035634)

In [8]:
df = pd.read_csv('./stock_v11.csv', encoding='cp949')

scaler=MinMaxScaler(feature_range=(0,1))
sdf=scaler.fit_transform(np.array(df))

max_val = max(df['종가'])
min_val = min(df['종가'])

training_size=int(len(sdf)*0.70)
test_size=len(sdf)-training_size
train_data,test_data=sdf[0:training_size,:],sdf[training_size:len(sdf),:]

def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-time_step-1):
        a = dataset[i:(i+time_step), [0,1,4,5,6,7]]
        dataX.append(a)
        dataY.append(dataset[i + time_step, 0])
        
    return np.array(dataX), np.array(dataY)

time_step = 10
X_train, y_train = create_dataset(train_data, time_step)
X_test, y_test = create_dataset(test_data, time_step)


model = Sequential()
model.add(LSTM(32, return_sequences=True, input_shape=(10, 6), activation='tanh'))
model.add(LSTM(32, return_sequences=False))
model.add(Dropout(0.1))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')


model.fit(X_train, y_train, validation_data=(X_test,y_test), epochs=10, batch_size=5)


train_predict=model.predict(X_train)
test_predict=model.predict(X_test)

train_predict = train_predict*(max_val-min_val) + min_val

y_train = y_train.reshape(-1,1)
y_train = y_train*(max_val-min_val) + min_val

test_predict = test_predict*(max_val-min_val) + min_val

y_test = y_test.reshape(-1,1)
y_test = y_test*(max_val-min_val) + min_val



lstm_rmse = math.sqrt(mean_squared_error(y_test,test_predict))
lstm_r2 = r2_score(y_test,test_predict)

lstm_rmse, lstm_r2

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


(1365.3727259930304, 0.9607891765907459)

In [9]:
df = pd.read_csv('./stock_v11.csv', encoding='cp949')

scaler=MinMaxScaler(feature_range=(0,1))
sdf=scaler.fit_transform(np.array(df))

max_val = max(df['종가'])
min_val = min(df['종가'])

training_size=int(len(sdf)*0.70)
test_size=len(sdf)-training_size
train_data,test_data=sdf[0:training_size,:],sdf[training_size:len(sdf),:]

def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-time_step-1):
        a = dataset[i:(i+time_step), [0,1,4,5,6,7]]
        dataX.append(a)
        dataY.append(dataset[i + time_step, 0])
        
    return np.array(dataX), np.array(dataY)

time_step = 20
X_train, y_train = create_dataset(train_data, time_step)
X_test, y_test = create_dataset(test_data, time_step)


model = Sequential()
model.add(LSTM(32, return_sequences=True, input_shape=(20, 6), activation='tanh'))
model.add(LSTM(32, return_sequences=False))
model.add(Dropout(0.1))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')


model.fit(X_train, y_train, validation_data=(X_test,y_test), epochs=10, batch_size=5)


train_predict=model.predict(X_train)
test_predict=model.predict(X_test)

train_predict = train_predict*(max_val-min_val) + min_val

y_train = y_train.reshape(-1,1)
y_train = y_train*(max_val-min_val) + min_val

test_predict = test_predict*(max_val-min_val) + min_val

y_test = y_test.reshape(-1,1)
y_test = y_test*(max_val-min_val) + min_val



lstm_rmse = math.sqrt(mean_squared_error(y_test,test_predict))
lstm_r2 = r2_score(y_test,test_predict)

lstm_rmse, lstm_r2

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


(1506.699265984948, 0.952692676786048)

In [10]:
df = pd.read_csv('./stock_v11.csv', encoding='cp949')

scaler=MinMaxScaler(feature_range=(0,1))
sdf=scaler.fit_transform(np.array(df))

max_val = max(df['종가'])
min_val = min(df['종가'])

training_size=int(len(sdf)*0.70)
test_size=len(sdf)-training_size
train_data,test_data=sdf[0:training_size,:],sdf[training_size:len(sdf),:]

def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-time_step-1):
        a = dataset[i:(i+time_step), [0,1,4,5,6,7]]
        dataX.append(a)
        dataY.append(dataset[i + time_step, 0])
        
    return np.array(dataX), np.array(dataY)

time_step = 5
X_train, y_train = create_dataset(train_data, time_step)
X_test, y_test = create_dataset(test_data, time_step)


model = Sequential()
model.add(LSTM(32, return_sequences=True, input_shape=(5, 6), activation='tanh'))
model.add(LSTM(32, return_sequences=False))
model.add(Dropout(0.1))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')


model.fit(X_train, y_train, validation_data=(X_test,y_test), epochs=10, batch_size=5)


train_predict=model.predict(X_train)
test_predict=model.predict(X_test)

train_predict = train_predict*(max_val-min_val) + min_val

y_train = y_train.reshape(-1,1)
y_train = y_train*(max_val-min_val) + min_val

test_predict = test_predict*(max_val-min_val) + min_val

y_test = y_test.reshape(-1,1)
y_test = y_test*(max_val-min_val) + min_val



lstm_rmse = math.sqrt(mean_squared_error(y_test,test_predict))
lstm_r2 = r2_score(y_test,test_predict)

lstm_rmse, lstm_r2

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


(2302.4272227976567, 0.8879383840143892)

In [11]:
df = pd.read_csv('./stock_v11.csv', encoding='cp949')

scaler=MinMaxScaler(feature_range=(0,1))
sdf=scaler.fit_transform(np.array(df))

max_val = max(df['종가'])
min_val = min(df['종가'])

training_size=int(len(sdf)*0.70)
test_size=len(sdf)-training_size
train_data,test_data=sdf[0:training_size,:],sdf[training_size:len(sdf),:]

def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-time_step-1):
        a = dataset[i:(i+time_step), [0,1,4,5,6,7]]
        dataX.append(a)
        dataY.append(dataset[i + time_step, 0])
        
    return np.array(dataX), np.array(dataY)

time_step = 15
X_train, y_train = create_dataset(train_data, time_step)
X_test, y_test = create_dataset(test_data, time_step)


model = Sequential()
model.add(LSTM(32, return_sequences=True, input_shape=(15, 6), activation='tanh'))
model.add(LSTM(32, return_sequences=True))
model.add(LSTM(32))
model.add(Dropout(0.1))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')


model.fit(X_train, y_train, validation_data=(X_test,y_test), epochs=10, batch_size=5)


train_predict=model.predict(X_train)
test_predict=model.predict(X_test)

train_predict = train_predict*(max_val-min_val) + min_val

y_train = y_train.reshape(-1,1)
y_train = y_train*(max_val-min_val) + min_val

test_predict = test_predict*(max_val-min_val) + min_val

y_test = y_test.reshape(-1,1)
y_test = y_test*(max_val-min_val) + min_val



lstm_rmse = math.sqrt(mean_squared_error(y_test,test_predict))
lstm_r2 = r2_score(y_test,test_predict)

lstm_rmse, lstm_r2

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


(1397.8084988062963, 0.9590927007986093)

In [19]:
df = pd.read_csv('./stock_v11.csv', encoding='cp949')

scaler=MinMaxScaler(feature_range=(0,1))
sdf=scaler.fit_transform(np.array(df))

max_val = max(df['종가'])
min_val = min(df['종가'])

training_size=int(len(sdf)*0.70)
test_size=len(sdf)-training_size
train_data,test_data=sdf[0:training_size,:],sdf[training_size:len(sdf),:]

def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-time_step-1):
        a = dataset[i:(i+time_step), [0,1,4,5,6,7]]
        dataX.append(a)
        dataY.append(dataset[i + time_step, 0])
        
    return np.array(dataX), np.array(dataY)

time_step = 15
X_train, y_train = create_dataset(train_data, time_step)
X_test, y_test = create_dataset(test_data, time_step)

tf.keras.backend.clear_session()
def create_model(dropout_rate):
    model=Sequential()
    model.add(GRU(32,return_sequences=True,input_shape=(15,6)))
    model.add(GRU(32,return_sequences=True))
    model.add(GRU(32))
    model.add(Dropout(dropout_rate))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error',optimizer='adam')
    return model

keras_model = KerasRegressor(build_fn=create_model)

params = {'batch_size': [5, 10, 20, 30, 50], 'epochs': [50, 100, 150, 200], 'dropout_rate': [0.1, 0.2, 0.3, 0.4]}

gru_cv = GridSearchCV(keras_model, param_grid=params, cv=5)
gru_cv.fit(X_train, y_train, validation_data=(X_test, y_test), verbose=1)


train_predict=gru_cv.predict(X_train)
test_predict=gru_cv.predict(X_test)
train_predict.shape, test_predict.shape

train_predict = train_predict*(max_val-min_val) + min_val

y_train = y_train.reshape(-1,1)
y_train = y_train*(max_val-min_val) + min_val

test_predict = test_predict*(max_val-min_val) + min_val

y_test = y_test.reshape(-1,1)
y_test = y_test*(max_val-min_val) + min_val



gru_rmse = math.sqrt(mean_squared_error(y_test,test_predict))
gru_r2 = r2_score(y_test,test_predict)

gru_cv.best_params_, gru_rmse, gru_r2

NameError: name 'data' is not defined

batch_size : 10, dropout_rate : 0.3, epochs : 150
            
RMSE : 1007.4431163552895

R2 : 0.978750607356813

In [15]:
df = pd.read_csv('./stock_v11.csv', encoding='cp949')

scaler=MinMaxScaler(feature_range=(0,1))
sdf=scaler.fit_transform(np.array(df))

max_val = max(df['종가'])
min_val = min(df['종가'])

training_size=int(len(sdf)*0.70)
test_size=len(sdf)-training_size
train_data,test_data=sdf[0:training_size,:],sdf[training_size:len(sdf),:]

def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-time_step-1):
        a = dataset[i:(i+time_step), [0,1,4,5,6,7]]
        dataX.append(a)
        dataY.append(dataset[i + time_step, 0])
        
    return np.array(dataX), np.array(dataY)

time_step = 15
X_train, y_train = create_dataset(train_data, time_step)
X_test, y_test = create_dataset(test_data, time_step)

model = Sequential()
model.add(GRU(32,return_sequences=True,input_shape=(15,6)))
model.add(GRU(32,return_sequences=True))
model.add(GRU(32))
model.add(Dropout(0.3))
model.add(Dense(1))
model.compile(loss='mean_squared_error',optimizer='adam')

model.fit(X_train, y_train, validation_data=(X_test,y_test), epochs=10, batch_size=10)


train_predict=model.predict(X_train)
test_predict=model.predict(X_test)

train_predict = train_predict*(max_val-min_val) + min_val

y_train = y_train.reshape(-1,1)
y_train = y_train*(max_val-min_val) + min_val

test_predict = test_predict*(max_val-min_val) + min_val

y_test = y_test.reshape(-1,1)
y_test = y_test*(max_val-min_val) + min_val



gru_rmse = math.sqrt(mean_squared_error(y_test,test_predict))
gru_r2 = r2_score(y_test,test_predict)

gru_rmse, gru_r2

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


(1082.44296369256, 0.9754689861726648)

In [14]:
df = pd.read_csv('./stock_v11.csv', encoding='cp949')

scaler=MinMaxScaler(feature_range=(0,1))
sdf=scaler.fit_transform(np.array(df))

max_val = max(df['종가'])
min_val = min(df['종가'])

training_size=int(len(sdf)*0.70)
test_size=len(sdf)-training_size
train_data,test_data=sdf[0:training_size,:],sdf[training_size:len(sdf),:]

def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-time_step-1):
        a = dataset[i:(i+time_step), [0,1,4,5,6,7]]
        dataX.append(a)
        dataY.append(dataset[i + time_step, 0])
        
    return np.array(dataX), np.array(dataY)

time_step = 15
X_train, y_train = create_dataset(train_data, time_step)
X_test, y_test = create_dataset(test_data, time_step)

model = Sequential()
model.add(GRU(16,return_sequences=True,input_shape=(15,6)))
model.add(GRU(16,return_sequences=True))
model.add(GRU(16))
model.add(Dropout(0.3))
model.add(Dense(1))
model.compile(loss='mean_squared_error',optimizer='adam')

model.fit(X_train, y_train, validation_data=(X_test,y_test), epochs=10, batch_size=10)


train_predict=model.predict(X_train)
test_predict=model.predict(X_test)

train_predict = train_predict*(max_val-min_val) + min_val

y_train = y_train.reshape(-1,1)
y_train = y_train*(max_val-min_val) + min_val

test_predict = test_predict*(max_val-min_val) + min_val

y_test = y_test.reshape(-1,1)
y_test = y_test*(max_val-min_val) + min_val



gru_rmse = math.sqrt(mean_squared_error(y_test,test_predict))
gru_r2 = r2_score(y_test,test_predict)

gru_rmse, gru_r2

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


(1321.5618442294278, 0.9634337489586432)

In [None]:
df = pd.read_csv('./stock_v11.csv', encoding='cp949')

scaler=MinMaxScaler(feature_range=(0,1))
sdf=scaler.fit_transform(np.array(df))

max_val = max(df['종가'])
min_val = min(df['종가'])

training_size=int(len(sdf)*0.70)
test_size=len(sdf)-training_size
train_data,test_data=sdf[0:training_size,:],sdf[training_size:len(sdf),:]

def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-time_step-1):
        a = dataset[i:(i+time_step), [0,1,4,5,6,7]]
        dataX.append(a)
        dataY.append(dataset[i + time_step, 0])
        
    return np.array(dataX), np.array(dataY)

time_step = 15
X_train, y_train = create_dataset(train_data, time_step)
X_test, y_test = create_dataset(test_data, time_step)


tf.keras.backend.clear_session()
def create_model(dropout_rate):
    model=Sequential()
    model.add(LSTM(32,return_sequences=True,input_shape=(15,6), activation='tanh'))
    model.add(LSTM(32,return_sequences=True))
    model.add(GRU(32,return_sequences=True))
    model.add(GRU(32))
    model.add(Dropout(dropout_rate))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error',optimizer='adam')
    return model

keras_model = KerasRegressor(build_fn=create_model)

params = {'batch_size': [5, 10, 20, 30, 50], 'epochs': [50, 100, 150, 200], 'dropout_rate': [0.1, 0.2, 0.3, 0.4]}

lstmgru_cv = GridSearchCV(keras_model, param_grid=params, cv=5)
lstmgru_cv.fit(X_rain, y_train, validation_data=(X_test, y_test), verbose=1)


train_predict=lstmgru_cv.predict(X_train)
test_predict=lstmgru_cv.predict(X_test)
train_predict.shape, test_predict.shape

train_predict = train_predict*(max_val-min_val) + min_val

y_train = y_train.reshape(-1,1)
y_train = y_train*(max_val-min_val) + min_val

test_predict = test_predict*(max_val-min_val) + min_val

y_test = y_test.reshape(-1,1)
y_test = y_test*(max_val-min_val) + min_val



lstmgru_rmse = math.sqrt(mean_squared_error(y_test,test_predict))
lstmgru_r2 = r2_score(y_test,test_predict)

lstmgru_cv.best_params_, lstmgru_rmse, lstmgru_r2

batch_size : 20, dropout_rate : 0.1, epochs : 200
            
RMSE : 1022.8149156762958

R2 : 0.9780972039458445

In [None]:
df = pd.read_csv('./stock_v11.csv', encoding='cp949')

scaler=MinMaxScaler(feature_range=(0,1))
sdf=scaler.fit_transform(np.array(df))

max_val = max(df['종가'])
min_val = min(df['종가'])

training_size=int(len(sdf)*0.70)
test_size=len(sdf)-training_size
train_data,test_data=sdf[0:training_size,:],sdf[training_size:len(sdf),:]

def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-time_step-1):
        a = dataset[i:(i+time_step), [0,1,4,5,6,7]]
        dataX.append(a)
        dataY.append(dataset[i + time_step, 0])
        
    return np.array(dataX), np.array(dataY)

time_step = 15
X_train, y_train = create_dataset(train_data, time_step)
X_test, y_test = create_dataset(test_data, time_step)


# Keras 모델을 생성하는 함수를 정의합니다.
def create_model(dropout_rate):
    model = Sequential()
    model.add(LSTM(16, return_sequences=True, input_shape=(15, 6), activation='tanh'))
    model.add(LSTM(16, return_sequences=False))
    model.add(Dropout(dropout_rate))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model

# KerasRegressor를 사용하여 Keras 모델을 래핑합니다.
keras_model = KerasRegressor(build_fn=create_model)

# GridSearchCV의 param_grid를 수정합니다.
params = {'batch_size': [5, 10, 20, 30, 50], 'epochs': [10, 50, 100, 150, 200], 
          'dropout_rate': [0, 0.1, 0.2, 0.3, 0.4]}

# KerasRegressor로 GridSearchCV를 수행합니다.
lstm_cv = GridSearchCV(keras_model, param_grid=params, cv=5)
lstm_cv.fit(X_train, y_train, validation_data=(X_test, y_test), verbose=1)


train_predict=lstm_cv.predict(X_train)
test_predict=lstm_cv.predict(X_test)
train_predict.shape, test_predict.shape

train_predict = train_predict*(max_val-min_val) + min_val

y_train = y_train.reshape(-1,1)
y_train = y_train*(max_val-min_val) + min_val

test_predict = test_predict*(max_val-min_val) + min_val

y_test = y_test.reshape(-1,1)
y_test = y_test*(max_val-min_val) + min_val



lstm_rmse = math.sqrt(mean_squared_error(y_test,test_predict))
lstm_r2 = r2_score(y_test,test_predict)

lstm_cv.best_params_, lstm_rmse, lstm_r2