In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import os
import glob

%matplotlib inline
warnings.filterwarnings('ignore')

plt.rcParams['font.family'] = 'NanumGothic'

# CSV 파일들을 저장할 빈 DataFrame을 생성합니다.
stock = pd.DataFrame()

# data 폴더에 있는 모든 CSV 파일을 가져옵니다.
csv_files = glob.glob('data/*.csv')

# 모든 CSV 파일을 순회하며 데이터를 DataFrame에 추가합니다.
for csv_file in csv_files:
    tmp = pd.read_csv(csv_file)
    stock = pd.concat([stock, tmp], ignore_index=True)

# all_data DataFrame에 모든 CSV 파일의 내용이 포함됩니다.
# Date,Open,High,Low,Close,Volume,Change,ma5,ma10,ma20,Kospi,S&P500,rsi,%K,%D,bb_upper,bb_sma,bb_lower,volume_ma5,momentum,high_low_diff

In [9]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, GRU, Dense, concatenate
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.losses import Huber
from itertools import combinations
from tensorflow.keras.models import load_model


scaler = MinMaxScaler()
use_cols = ['Open', 'High', 'Low','Close','Volume']

colList = ['ma10', 'ma20', 'S&P500', 'Kospi', 'rsi', '%K', '%D', 'bb_upper', 'bb_lower', 'volume_ma5', 'momentum', 'high_low_diff']
result_pridict = []
com_colList = []

for r in range(2,3):
    for combination in combinations(colList, r):
        com_colList.append(list(combination))

for plus_cols in com_colList:
    use_cols.extend(plus_cols)

    scaled = scaler.fit_transform(stock[use_cols])
    df = pd.DataFrame(scaled, columns=use_cols)

    ln_use_cols = len(use_cols)
    WINDOW_SIZE = 20
    BATCH_SIZE = 100

    X, y = [], []

    for i in range(WINDOW_SIZE, len(scaled)):
        X.append(scaled[i - WINDOW_SIZE:i])
        y.append([scaled[i]])

    X, y = np.array(X), np.array(y)

    split_ratio = 0.8
    split_index = int(split_ratio * len(X))

    X_train, X_test = X[:split_index], X[split_index:]
    y_train, y_test = y[:split_index], y[split_index:]

    # LSTM 모델 정의
    lstm_input = Input(shape=(WINDOW_SIZE, ln_use_cols), name='lstm_input')
    lstm_layer = LSTM(64, activation='tanh')(lstm_input)

    # GRU 모델 정의
    gru_input = Input(shape=(WINDOW_SIZE, ln_use_cols), name='gru_input')
    gru_layer = GRU(64, activation='tanh')(gru_input)

    # 두 모델을 결합
    concatenated = concatenate([lstm_layer, gru_layer])

    # 공통 레이어 추가
    dense_layer = Dense(32, activation='relu')(concatenated)

    # 출력 레이어
    output = Dense(ln_use_cols, name='output')(dense_layer)

    # 모델 생성
    model = Model(inputs=[lstm_input, gru_input], outputs=output)

    loss = Huber()
    optimizer = Adam(learning_rate=0.001)
    model.compile(loss=loss, optimizer=optimizer)

    early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
    tmp = '_'.join(use_cols[5:])
    checkpoint = ModelCheckpoint(
        filepath=f'./checkpoint/LSTM_GRU//LSTM_GRU_{tmp}.h5',
        monitor='val_loss',
        save_best_only=True,
        verbose=1
    )
    # 모델 학습
    history = model.fit(
        {'lstm_input': X_train, 'gru_input': X_train},  # 입력을 딕셔너리로 지정
        y_train,
        validation_split=0.2,
        epochs=50,
        callbacks=[early_stopping, checkpoint],
        batch_size=32
    )


    input_scaled = stock[use_cols]

    # 예측을 수행할 데이터
    total_data = [[input_scaled[len(input_scaled)-WINDOW_SIZE-1:len(input_scaled)-1]]]
    conform_total_data = [[input_scaled[len(input_scaled)-1:len(input_scaled)]]][:4]
    # print(total_data, 'total_data')
    data = np.array(total_data)
    data = data.tolist()
    data = data[0]
    # print(data, 'data')
    conform_data = conform_total_data[0][0]
    conform_data = conform_data.values[0]

    data_2d = np.array(data).reshape(-1, len(data[0][0]))

    # print(data_2d, "data_2d")
    scaler = MinMaxScaler()
    scaler.fit(data_2d)

    scaled_data_2d = scaler.transform(data_2d)
    model = load_model(f'./checkpoint/LSTM_GRU/LSTM_GRU_{tmp}.h5')
    scaled_data_3d = scaled_data_2d.reshape(len(data), len(data[0]), len(data[0][0]))
    print(conform_total_data , "conform_total_data")
    predicted_value = model.predict({'lstm_input': scaled_data_3d, 'gru_input': scaled_data_3d})
    print(predicted_value,'predicted_value')
    predicted_value_original = scaler.inverse_transform(predicted_value)
    predicted_value_original = np.round(predicted_value_original, ln_use_cols)[0][:4]
    print(predicted_value_original,"predicted_value_original")
    conform_total_data = conform_total_data[0][0].values[0]
    predicted_value_original = list(predicted_value_original)
    predicted_value_original.extend(conform_total_data[:4])
    use_cols_check = '/'.join(use_cols)
    predicted_value_original.append(use_cols_check)
    result_use_cols = use_cols[:4]
    result_use_cols.extend(['_Open', '_High', '_Low', '_Close', 'Use_Cols_Check'])

    result_dict = dict(zip(result_use_cols, predicted_value_original))
    result_pridict.append(result_dict)
    print(result_pridict)
    use_cols = use_cols[:5]  # 초기 use_cols로 재설정


Epoch 1/50
Epoch 1: val_loss improved from inf to 0.00153, saving model to ./checkpoint/LSTM_GRU\LSTM_GRU_ma10_ma20.h5
Epoch 2/50
Epoch 2: val_loss improved from 0.00153 to 0.00147, saving model to ./checkpoint/LSTM_GRU\LSTM_GRU_ma10_ma20.h5
Epoch 3/50
Epoch 3: val_loss improved from 0.00147 to 0.00145, saving model to ./checkpoint/LSTM_GRU\LSTM_GRU_ma10_ma20.h5
Epoch 4/50
Epoch 4: val_loss improved from 0.00145 to 0.00144, saving model to ./checkpoint/LSTM_GRU\LSTM_GRU_ma10_ma20.h5
Epoch 5/50
Epoch 5: val_loss did not improve from 0.00144
Epoch 6/50
Epoch 6: val_loss did not improve from 0.00144
Epoch 7/50
Epoch 7: val_loss did not improve from 0.00144
Epoch 8/50
Epoch 8: val_loss did not improve from 0.00144
Epoch 9/50
Epoch 9: val_loss did not improve from 0.00144
Epoch 10/50
Epoch 10: val_loss improved from 0.00144 to 0.00143, saving model to ./checkpoint/LSTM_GRU\LSTM_GRU_ma10_ma20.h5
Epoch 11/50
Epoch 11: val_loss did not improve from 0.00143
Epoch 12/50
Epoch 12: val_loss did no

In [1]:

print(result_pridict)
import pandas as pd


# 데이터를 DataFrame으로 변환
df = pd.DataFrame(result_pridict)

# DataFrame을 엑셀 파일로 저장
df.to_excel('Ensemble_predict2.xlsx', index=False)

NameError: name 'result_pridict' is not defined