In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
import glob

%matplotlib inline
warnings.filterwarnings('ignore')

plt.rcParams['font.family'] = 'NanumGothic'

# CSV 파일들을 저장할 빈 DataFrame을 생성합니다.


# data 폴더에 있는 모든 CSV 파일을 가져옵니다.
csv_files = glob.glob('./high50data/*.csv')
print(csv_files)

['./high50data\\000270.csv', './high50data\\000660.csv', './high50data\\000810.csv', './high50data\\003550.csv', './high50data\\003670.csv', './high50data\\005380.csv', './high50data\\005490.csv', './high50data\\005930.csv', './high50data\\005935.csv', './high50data\\006400.csv', './high50data\\009150.csv', './high50data\\010130.csv', './high50data\\012330.csv', './high50data\\015760.csv', './high50data\\017670.csv', './high50data\\018260.csv', './high50data\\024110.csv', './high50data\\028260.csv', './high50data\\032830.csv', './high50data\\033780.csv', './high50data\\034020.csv', './high50data\\034730.csv', './high50data\\035420.csv', './high50data\\035720.csv', './high50data\\047050.csv', './high50data\\051910.csv', './high50data\\055550.csv', './high50data\\066570.csv', './high50data\\068270.csv', './high50data\\086790.csv', './high50data\\096770.csv', './high50data\\105560.csv', './high50data\\138040.csv', './high50data\\207940.csv', './high50data\\316140.csv', './high50data\\3234

In [8]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
import numpy as np
import pandas as pd

from tf.keras.models import Sequential
from tf.keras.layers import GRU, Dense, Dropout
from tf.keras.optimizers import Adam
from tf.keras.callbacks import EarlyStopping, ModelCheckpoint
from tf.keras.losses import Huber, MeanAbsoluteError
from tf.keras.models import load_model
from tf.keras.models import Model
from tf.keras.optimizers import AdamW

# 데이터 파일 경로 설정 (csv_files 변수에 파일 경로 리스트를 할당해야 합니다)

stock_num_li = ['005930', '000660', '207940', '005935', '005380', '005490', '000270', '051910', '035420', '006400', '068270', '105560', '012330', '003670', '028260', '055550', '066570', '035720', '032830', '096770', '003550', '086790', '000810','033780', '017670', '015760', '138040', '034730', '009150', '018260', '329180', '010130', '047050', '323410', '316140', '024110', '034020', '352820']
normal = 'Open High Low Close Volume '
M1 = 'ma10'
DU = 'high_low_diff bb_upper'
M1SP = 'ma10 S&P500'
M2VM = 'ma20 volumn_ma5'
SPU = 'S&P500 bb_upper'
M1K = 'ma10 %K'
M2SP = 'ma20 S&P500'
KOU = 'Kospi bb_upper'
KSPU = 'Kospi S&P500 bb_upper'
colset_list = [KSPU, M1, DU, M1SP, SPU, M1K, M2SP, KOU]
WINDOW_SIZE = 50
BATCH_SIZE = WINDOW_SIZE * 2
epoch_size = 10
patience = 5
learning_rate = 0.01

# 주식별로 학습 및 예측 실행
result_pridict = []
scaler = MinMaxScaler()
# 공통 데이터 로딩 및 전처리 함수
def load_and_preprocess_data(stock_num, colset, WINDOW_SIZE):
    stock = pd.DataFrame()
    file_path = [csv for csv in csv_files if f'./high50data\\{stock_num}.csv' in csv][0]
    df = pd.read_csv(file_path)
    stock = pd.concat([stock, df], ignore_index=True)
    
    tmp_use_col = normal + colset
    colset = colset.replace(" ", "_")
    use_cols = tmp_use_col.split()
    ln_use_cols = len(use_cols)
    
    scaled = scaler.fit_transform(stock[use_cols])
    df = pd.DataFrame(scaled, columns=use_cols)
    
    X, y = [], []
    
    for i in range(WINDOW_SIZE, len(scaled)):
        X.append(scaled[i - WINDOW_SIZE:i])
        y.append([scaled[i][3]])  # Close 열만 학습
    X, y = np.array(X), np.array(y)
    
    split_ratio = 0.8
    split_index = int(split_ratio * len(X))
    
    X_train, X_test = X[:split_index], X[split_index:]
    y_train, y_test = y[:split_index], y[split_index:]
    
    return X_train, X_test, y_train, y_test, use_cols, ln_use_cols

# 학습 함수
def train_model(X_train, y_train, use_cols, checkpoint_path, WINDOW_SIZE, learning_rate=0.01, epoch_size=30, patience=5):
    model = Sequential([
        GRU(32, activation='tanh', input_shape=(WINDOW_SIZE, len(use_cols))),
        Dropout(0.2),
        Dense(16, activation='relu'),
        Dropout(0.2),
        Dense(len(use_cols))
    ])

    loss = Huber()
    optimizer = AdamW(
        learning_rate=learning_rate,
        weight_decay=0.004,
        beta_1=0.9,
        beta_2=0.999,
        epsilon=1e-07,
        amsgrad=False,
        clipnorm=None,
        clipvalue=None,
        global_clipnorm=None,
        use_ema=False,
        ema_momentum=0.99,
        name="AdamW",
    )

    model.compile(loss=loss, optimizer=optimizer, metrics=[MeanAbsoluteError()])

    early_stopping = EarlyStopping(monitor='val_loss', patience=patience, restore_best_weights=True)

    checkpoint = ModelCheckpoint(
        filepath=checkpoint_path,
        monitor='val_loss',
        save_best_only=True,
        verbose=1,
    )

    history = model.fit(
        X_train,
        y_train,
        validation_split=0.2,
        epochs=epoch_size,
        callbacks=[early_stopping, checkpoint],
        batch_size=BATCH_SIZE
    )

# 예측 및 모델 저장 함수
def predict_and_save_model(stock_num, use_cols, WINDOW_SIZE):
    X_train, _, y_train, _, use_cols, ln_use_cols = load_and_preprocess_data(stock_num, use_cols, WINDOW_SIZE)
    
    checkpoint_path = f'./checkpoint/GRU/AdamW/{WINDOW_SIZE}/{stock_num}_{use_cols[5:]}_{WINDOW_SIZE}.h5'
    
    train_model(X_train, y_train, use_cols, checkpoint_path, WINDOW_SIZE, learning_rate, epoch_size, patience)
    
    model = load_model(checkpoint_path, custom_objects={'MeanAbsoluteError': MeanAbsoluteError()})
    
    # 데이터 다시 로딩
    stock = pd.DataFrame()
    file_path = [csv for csv in csv_files if f'./high50data\\{stock_num}.csv' in csv][0]
    df = pd.read_csv(file_path)
    stock = pd.concat([stock, df], ignore_index=True)

    input_scaled = stock[use_cols]
    total_data = [[input_scaled[len(input_scaled)-WINDOW_SIZE-1:len(input_scaled)-1]]]
    conform_total_data = [[input_scaled[len(input_scaled)-1:len(input_scaled)]]][:4]
    
    data = np.array(total_data)
    data = data.tolist()
    data = data[0]
    
    conform_data = conform_total_data[0][0]
    conform_data = conform_data.values[0]
    
    data_2d = np.array(data).reshape(-1, len(data[0][0]))
    
    scaler = MinMaxScaler()
    scaler.fit(data_2d)
    
    scaled_data_2d = scaler.transform(data_2d)
    
    scaled_data_3d = scaled_data_2d.reshape(len(data), len(data[0]), len(data[0][0]))
    
    model = load_model(checkpoint_path, custom_objects={'MeanAbsoluteError': MeanAbsoluteError()})
    
    predicted_value = model.predict(scaled_data_3d)
    
    # output_layer = model.get_layer(index=-1)
    
    # new_model = Model(inputs=model.input, outputs=output_layer.output)
    
    # predicted_value = new_model.predict(scaled_data_3d)
    
    print(predicted_value)
    predicted_value_original = scaler.inverse_transform(predicted_value)
    predicted_value_original = np.round(predicted_value_original, ln_use_cols)[0][:4]
    conform_total_data = conform_total_data[0][0].values[0]
    predicted_value_original = list(predicted_value_original)
    predicted_value_original.extend(conform_total_data[:4])
    
    use_cols_check = '/'.join(use_cols)
    predicted_value_original.append(use_cols_check)
    result_use_cols = use_cols[:4]
    result_use_cols.extend(['_Open', '_High', '_Low', '_Close', 'Use_Cols_Check'])
    
    result_dict = dict(zip(result_use_cols, predicted_value_original))
    result_pridict.append(result_dict)

for stock_num in stock_num_li:
    for colset in colset_list:
        predict_and_save_model(stock_num, colset, WINDOW_SIZE)



Epoch 1/30
Epoch 1: val_loss improved from inf to 0.00067, saving model to ./checkpoint/GRU/AdamW/50\005930_Kospi_S&P500_bb_upper_50.h5
Epoch 2/30
Epoch 2: val_loss improved from 0.00067 to 0.00021, saving model to ./checkpoint/GRU/AdamW/50\005930_Kospi_S&P500_bb_upper_50.h5
Epoch 3/30
Epoch 3: val_loss did not improve from 0.00021
Epoch 4/30
Epoch 4: val_loss improved from 0.00021 to 0.00007, saving model to ./checkpoint/GRU/AdamW/50\005930_Kospi_S&P500_bb_upper_50.h5
Epoch 5/30
Epoch 5: val_loss did not improve from 0.00007
Epoch 6/30
Epoch 6: val_loss did not improve from 0.00007
Epoch 7/30
Epoch 7: val_loss did not improve from 0.00007
Epoch 8/30
Epoch 8: val_loss improved from 0.00007 to 0.00004, saving model to ./checkpoint/GRU/AdamW/50\005930_Kospi_S&P500_bb_upper_50.h5
Epoch 9/30
Epoch 9: val_loss did not improve from 0.00004
Epoch 10/30
Epoch 10: val_loss did not improve from 0.00004
Epoch 11/30
Epoch 11: val_loss did not improve from 0.00004
Epoch 12/30
Epoch 12: val_loss did

In [1]:
# print(stock_num)
import pandas as pd


# 데이터를 DataFrame으로 변환
df = pd.DataFrame(result_pridict)

# DataFrame을 엑셀 파일로 저장
df.to_excel('GRU_adamW_50(9).xlsx', index=False)

NameError: name 'result_pridict' is not defined