In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
import ta

In [2]:
# 완료된, 실패한 종목 관리 파일
completed_file = 'completed_symbols_h5.txt'
failed_file = 'failed_symbols.txt'

# 기존 파일 읽기
completed_symbols = set()
if os.path.exists(completed_file):
    with open(completed_file, 'r') as f:
        completed_symbols = set(f.read().splitlines())

failed_symbols = set()
if os.path.exists(failed_file):
    with open(failed_file, 'r') as f:
        failed_symbols = set(f.read().splitlines())

def load_progress():
    if os.path.exists(completed_file):
        with open(completed_file, 'r') as f:
            done_symbols = set([line.strip() for line in f if line.strip()])
    else:
        done_symbols = set()
    return done_symbols

In [3]:
import joblib

# sMAPE 계산 함수
def smape(a, f):
    return 100 / len(a) * np.sum(2 * np.abs(f - a) / (np.abs(a) + np.abs(f)))

# 모델 저장 경로
MODEL_FOLDER = 'LSTM_MODEL_H5_WIN'
os.makedirs(MODEL_FOLDER, exist_ok=True)

# 종목별 평가 지표 저장용
results = []

# 전체 CSV 로딩
df_all = pd.read_csv('sp500_latest.csv')
symbols = df_all['Symbol'].unique()
done_symbols = load_progress()
print(f"[INFO] Symbols already processed (will skip): {len(done_symbols)}")

for symbol in symbols:
    
    if symbol in done_symbols:
        print(f"[SKIP] Already processed {symbol}")
        continue

    try:
        df = df_all[df_all['Symbol'] == symbol].copy()
        df = df.dropna()

        # 기술적 지표 추가
        df['MA20'] = ta.trend.sma_indicator(df['Close'], window=20)
        bb = ta.volatility.BollingerBands(df['Close'], window=20, window_dev=2)
        df['Upper'] = bb.bollinger_hband()
        df['Lower'] = bb.bollinger_lband()
        df['RSI'] = ta.momentum.RSIIndicator(df['Close'], window=14).rsi()
        df.dropna(inplace=True)

        features = ['Open', 'High', 'Low', 'Close', 'Volume', 'MA20', 'Upper', 'Lower', 'RSI']
        scaler = MinMaxScaler()
        scaled_data = scaler.fit_transform(df[features])

        sequence_length = 50
        X, y = [], []
        for i in range(len(scaled_data) - sequence_length):
            X.append(scaled_data[i:i+sequence_length])
            y.append(scaled_data[i+sequence_length][features.index('Close')])
        X = np.array(X)
        y = np.array(y)

        # 7:2:1 split
        train_size = int(len(X) * 0.7)
        val_size = int(len(X) * 0.2)
        X_train, X_val, X_test = X[:train_size], X[train_size:train_size+val_size], X[train_size+val_size:]
        y_train, y_val, y_test = y[:train_size], y[train_size:train_size+val_size], y[train_size+val_size:]

        # 모델 구성
        model = Sequential([
            LSTM(64, return_sequences=True, input_shape=(X.shape[1], X.shape[2])),
            Dropout(0.3),
            LSTM(32),
            Dropout(0.3),
            Dense(1)
        ])
        model.compile(optimizer='adam', loss='mse')
        early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

        # 학습
        model.fit(X_train, y_train, epochs=50, batch_size=32,
                  validation_data=(X_val, y_val), callbacks=[early_stop], verbose=0)
        
        # 모델 저장
        # model.save(os.path.join(MODEL_FOLDER, symbol), save_format='tf') # TF 방식 
        model.save(os.path.join(MODEL_FOLDER, f'{symbol}.h5')) # H5 방식
        
        # 스케일러 저장
        # joblib.dump(scaler, '/home/danssa/proj_ua/shared/scaler60.pkl') # pkl 방식 
        joblib.dump(scaler, os.path.join(MODEL_FOLDER, f'{symbol}_scaler.joblib')) # joblib 방식
        
        
        # 성공시 기록
        with open(completed_file, 'a') as f:
            f.write(f"{symbol}\n")


        # 예측 및 역변환
        pred = model.predict(X_test)
        close_index = features.index('Close')
        y_full = np.zeros((len(y_test), len(features)))
        pred_full = np.zeros((len(pred), len(features)))
        y_full[:, close_index] = y_test
        pred_full[:, close_index] = pred.flatten()
        true_rescaled = scaler.inverse_transform(y_full)[:, close_index]
        pred_rescaled = scaler.inverse_transform(pred_full)[:, close_index]

        # 평가
        smape_val = smape(true_rescaled, pred_rescaled)
        mae_val = mean_absolute_error(true_rescaled, pred_rescaled)
        mse_val = mean_squared_error(true_rescaled, pred_rescaled)

        results.append({
            'Symbol': symbol,
            'sMAPE': smape_val,
            'MAE': mae_val,
            'MSE': mse_val
        })

        print(f"{symbol} 완료 - sMAPE: {smape_val:.2f}%, MAE: {mae_val:.2f}, MSE: {mse_val:.2f}")

    except Exception as e:
        print(f"{symbol} 실패: {e}")
        # 실패 기록
        with open(failed_file, 'a') as f:
            f.write(f"{symbol}\n")




# 결과 정리
results_df = pd.DataFrame(results)
results_df = results_df.sort_values('sMAPE')

# sMAPE 구간화
bins = np.arange(0, 105, 5)
labels = [f'{i}~{i+5}%' for i in bins[:-1]]
results_df['sMAPE_Group'] = pd.cut(results_df['sMAPE'], bins=bins, labels=labels, right=False)

print("\n--- Top 10 종목 (sMAPE 낮은 순) ---")
print(results_df.head(10))

print("\n--- sMAPE 오차 범위별 종목 개수 (5% 단위) ---")
print(results_df['sMAPE_Group'].value_counts().sort_index())

print("\n--- 전체 지표 ---")
print(results_df)

print("\nGPU 상태:")
print("TensorFlow version:", tf.__version__)
print("Num GPUs Available:", len(tf.config.list_physical_devices('GPU')))
print("Available devices:", tf.config.list_physical_devices())

[INFO] Symbols already processed (will skip): 361
[SKIP] Already processed MMM
[SKIP] Already processed AOS
[SKIP] Already processed ABT
[SKIP] Already processed ABBV
[SKIP] Already processed ACN
[SKIP] Already processed ADBE
[SKIP] Already processed AMD
[SKIP] Already processed AES
[SKIP] Already processed AFL
[SKIP] Already processed A
[SKIP] Already processed APD
[SKIP] Already processed ABNB
[SKIP] Already processed AKAM
[SKIP] Already processed ALB
[SKIP] Already processed ARE
[SKIP] Already processed ALGN
[SKIP] Already processed ALLE
[SKIP] Already processed LNT
[SKIP] Already processed ALL
[SKIP] Already processed GOOGL
[SKIP] Already processed GOOG
[SKIP] Already processed MO
[SKIP] Already processed AMZN
[SKIP] Already processed AMCR
[SKIP] Already processed AEE
[SKIP] Already processed AEP
[SKIP] Already processed AXP
[SKIP] Already processed AIG
[SKIP] Already processed AMT
[SKIP] Already processed AWK
[SKIP] Already processed AME
[SKIP] Already processed AMGN
[SKIP] Alread

  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
PNR 완료 - sMAPE: 3.11%, MAE: 2.74, MSE: 11.78


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
PEP 완료 - sMAPE: 1.50%, MAE: 2.47, MSE: 9.25


  super().__init__(**kwargs)






[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
PFE 완료 - sMAPE: 1.60%, MAE: 0.42, MSE: 0.27


  super().__init__(**kwargs)






[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
PCG 완료 - sMAPE: 2.53%, MAE: 0.47, MSE: 0.31


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
PM 완료 - sMAPE: 3.23%, MAE: 3.63, MSE: 23.49


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step
PSX 완료 - sMAPE: 2.47%, MAE: 3.33, MSE: 18.52


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
PNW 완료 - sMAPE: 1.88%, MAE: 1.48, MSE: 3.45


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
PNC 완료 - sMAPE: 2.13%, MAE: 3.58, MSE: 24.69


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
POOL 완료 - sMAPE: 2.26%, MAE: 8.07, MSE: 106.68


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
PPG 완료 - sMAPE: 1.19%, MAE: 1.54, MSE: 3.67


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
PPL 완료 - sMAPE: 2.04%, MAE: 0.61, MSE: 0.59


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
PFG 완료 - sMAPE: 2.06%, MAE: 1.64, MSE: 4.49


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
PG 완료 - sMAPE: 3.63%, MAE: 5.82, MSE: 41.43


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
PGR 완료 - sMAPE: 12.82%, MAE: 26.81, MSE: 823.06


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
PLD 완료 - sMAPE: 1.97%, MAE: 2.25, MSE: 9.17


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
PRU 완료 - sMAPE: 3.90%, MAE: 4.38, MSE: 25.18


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
PEG 완료 - sMAPE: 5.94%, MAE: 4.55, MSE: 30.30


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
PTC 완료 - sMAPE: 2.93%, MAE: 5.28, MSE: 36.98


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
PSA 완료 - sMAPE: 2.01%, MAE: 5.96, MSE: 56.43


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
PHM 완료 - sMAPE: 10.00%, MAE: 11.62, MSE: 165.65


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
PWR 완료 - sMAPE: 8.14%, MAE: 22.04, MSE: 669.46


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
QCOM 완료 - sMAPE: 3.38%, MAE: 5.77, MSE: 50.76


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
DGX 완료 - sMAPE: 1.71%, MAE: 2.41, MSE: 9.50


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
RL 완료 - sMAPE: 6.59%, MAE: 11.90, MSE: 193.08


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
RJF 완료 - sMAPE: 4.97%, MAE: 6.55, MSE: 83.11


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
RTX 완료 - sMAPE: 2.40%, MAE: 2.60, MSE: 10.93


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
O 완료 - sMAPE: 1.72%, MAE: 0.93, MSE: 1.31


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
REG 완료 - sMAPE: 1.24%, MAE: 0.80, MSE: 1.01


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
REGN 완료 - sMAPE: 6.17%, MAE: 60.49, MSE: 4832.29


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
RF 완료 - sMAPE: 2.62%, MAE: 0.55, MSE: 0.51


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
RSG 완료 - sMAPE: 7.59%, MAE: 14.29, MSE: 230.17


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
RMD 완료 - sMAPE: 2.60%, MAE: 5.43, MSE: 57.12


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
RVTY 완료 - sMAPE: 2.02%, MAE: 2.26, MSE: 9.38


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
ROK 완료 - sMAPE: 1.98%, MAE: 5.37, MSE: 58.98


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
ROL 완료 - sMAPE: 3.58%, MAE: 1.67, MSE: 3.68


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
ROP 완료 - sMAPE: 2.71%, MAE: 14.62, MSE: 280.56


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
ROST 완료 - sMAPE: 3.45%, MAE: 4.88, MSE: 32.15


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
RCL 완료 - sMAPE: 4.57%, MAE: 7.68, MSE: 91.51


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step
SPGI 완료 - sMAPE: 2.69%, MAE: 12.62, MSE: 215.61


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
CRM 완료 - sMAPE: 2.67%, MAE: 7.57, MSE: 104.51


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
SBAC 완료 - sMAPE: 1.95%, MAE: 4.15, MSE: 27.94


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
SLB 완료 - sMAPE: 3.01%, MAE: 1.35, MSE: 2.70


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
STX 완료 - sMAPE: 3.01%, MAE: 2.85, MSE: 12.48


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
SRE 완료 - sMAPE: 2.58%, MAE: 2.04, MSE: 7.98


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
NOW 완료 - sMAPE: 6.00%, MAE: 51.49, MSE: 4038.87


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
SHW 완료 - sMAPE: 4.23%, MAE: 14.28, MSE: 299.89


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
SPG 완료 - sMAPE: 5.40%, MAE: 8.21, MSE: 88.04


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
SWKS 완료 - sMAPE: 2.92%, MAE: 2.86, MSE: 14.94


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
SJM 완료 - sMAPE: 1.93%, MAE: 2.20, MSE: 7.97


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 159ms/step
SW 완료 - sMAPE: 1.63%, MAE: 0.87, MSE: 0.84


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
SNA 완료 - sMAPE: 3.83%, MAE: 11.37, MSE: 222.20


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 157ms/step
SOLV 완료 - sMAPE: 3.31%, MAE: 2.26, MSE: 5.95


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
SO 완료 - sMAPE: 3.71%, MAE: 2.98, MSE: 12.99


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
LUV 완료 - sMAPE: 2.79%, MAE: 0.80, MSE: 1.11


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
SWK 완료 - sMAPE: 2.16%, MAE: 1.93, MSE: 7.26


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
SBUX 완료 - sMAPE: 2.30%, MAE: 1.97, MSE: 11.33


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
STT 완료 - sMAPE: 1.82%, MAE: 1.51, MSE: 3.91


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
STLD 완료 - sMAPE: 11.02%, MAE: 13.46, MSE: 213.90


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
STE 완료 - sMAPE: 1.97%, MAE: 4.40, MSE: 32.15


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
SYK 완료 - sMAPE: 6.00%, MAE: 20.35, MSE: 475.23


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
SMCI 완료 - sMAPE: 23.28%, MAE: 14.59, MSE: 347.31


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
SYF 완료 - sMAPE: 2.65%, MAE: 1.33, MSE: 3.67


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
SNPS 완료 - sMAPE: 6.46%, MAE: 34.68, MSE: 1580.37


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
SYY 완료 - sMAPE: 1.49%, MAE: 1.11, MSE: 2.26


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
TMUS 완료 - sMAPE: 3.96%, MAE: 7.84, MSE: 107.24


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
TROW 완료 - sMAPE: 1.77%, MAE: 1.92, MSE: 6.09


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
TTWO 완료 - sMAPE: 2.41%, MAE: 3.83, MSE: 24.51


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
TPR 완료 - sMAPE: 3.52%, MAE: 1.68, MSE: 6.15


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
TRGP 완료 - sMAPE: 4.88%, MAE: 7.11, MSE: 102.15


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
TGT 완료 - sMAPE: 2.58%, MAE: 3.77, MSE: 31.92


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
TEL 완료 - sMAPE: 2.55%, MAE: 3.66, MSE: 19.86


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
TDY 완료 - sMAPE: 1.86%, MAE: 8.03, MSE: 112.35


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
TER 완료 - sMAPE: 4.52%, MAE: 5.51, MSE: 46.38


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
TSLA 완료 - sMAPE: 4.74%, MAE: 11.43, MSE: 268.01


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
TXN 완료 - sMAPE: 2.80%, MAE: 5.19, MSE: 43.78


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
TPL 완료 - sMAPE: 6.27%, MAE: 60.02, MSE: 10320.62


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
TXT 완료 - sMAPE: 3.69%, MAE: 3.20, MSE: 13.90


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
TMO 완료 - sMAPE: 1.75%, MAE: 9.91, MSE: 184.24


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
TJX 완료 - sMAPE: 7.83%, MAE: 8.35, MSE: 88.39


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
TKO 완료 - sMAPE: 4.78%, MAE: 5.41, MSE: 52.93


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
TSCO 완료 - sMAPE: 3.56%, MAE: 1.88, MSE: 5.12


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
TT 완료 - sMAPE: 10.86%, MAE: 35.72, MSE: 1566.06


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
TDG 완료 - sMAPE: 11.43%, MAE: 132.90, MSE: 19157.72


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
TRV 완료 - sMAPE: 3.18%, MAE: 7.15, MSE: 82.44


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
TRMB 완료 - sMAPE: 2.31%, MAE: 1.38, MSE: 4.33


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
TFC 완료 - sMAPE: 1.78%, MAE: 0.68, MSE: 0.80


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
TYL 완료 - sMAPE: 3.72%, MAE: 20.10, MSE: 622.03


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
TSN 완료 - sMAPE: 1.62%, MAE: 0.93, MSE: 1.45


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
USB 완료 - sMAPE: 1.96%, MAE: 0.83, MSE: 1.21


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
UBER 완료 - sMAPE: 3.26%, MAE: 2.28, MSE: 8.79


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
UDR 완료 - sMAPE: 1.97%, MAE: 0.77, MSE: 0.86


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
ULTA 완료 - sMAPE: 4.45%, MAE: 18.72, MSE: 535.71


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
UNP 완료 - sMAPE: 1.68%, MAE: 3.97, MSE: 26.16


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
UAL 완료 - sMAPE: 3.87%, MAE: 2.29, MSE: 10.58


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
UPS 완료 - sMAPE: 2.12%, MAE: 2.86, MSE: 16.85


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
URI 완료 - sMAPE: 8.80%, MAE: 61.54, MSE: 4958.00


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
UNH 완료 - sMAPE: 3.45%, MAE: 18.37, MSE: 534.59


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
UHS 완료 - sMAPE: 2.27%, MAE: 4.50, MSE: 38.72


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
VLO 완료 - sMAPE: 4.54%, MAE: 6.53, MSE: 68.64


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
VTR 완료 - sMAPE: 2.27%, MAE: 1.23, MSE: 2.40


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 160ms/step
VLTO 완료 - sMAPE: 1.68%, MAE: 1.76, MSE: 4.61


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
VRSN 완료 - sMAPE: 1.84%, MAE: 3.41, MSE: 17.75


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
VRSK 완료 - sMAPE: 2.69%, MAE: 7.01, MSE: 69.62


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
VZ 완료 - sMAPE: 1.86%, MAE: 0.72, MSE: 0.86


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
VRTX 완료 - sMAPE: 7.79%, MAE: 34.35, MSE: 1372.09


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
VTRS 완료 - sMAPE: 2.27%, MAE: 0.26, MSE: 0.12


  super().__init__(**kwargs)


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
VICI 완료 - sMAPE: 1.48%, MAE: 0.44, MSE: 0.36


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
V 완료 - sMAPE: 4.56%, MAE: 12.65, MSE: 196.99


  super().__init__(**kwargs)


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
VST 완료 - sMAPE: 20.24%, MAE: 19.82, MSE: 602.11


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
VMC 완료 - sMAPE: 3.66%, MAE: 9.34, MSE: 124.56


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
WRB 완료 - sMAPE: 9.29%, MAE: 4.95, MSE: 27.50


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
GWW 완료 - sMAPE: 8.16%, MAE: 79.52, MSE: 7784.74


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
WAB 완료 - sMAPE: 5.29%, MAE: 8.84, MSE: 108.26


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
WBA 완료 - sMAPE: 6.50%, MAE: 0.75, MSE: 0.90


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
WMT 완료 - sMAPE: 7.48%, MAE: 5.44, MSE: 44.55


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
DIS 완료 - sMAPE: 2.17%, MAE: 2.20, MSE: 9.31


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
WBD 완료 - sMAPE: 5.07%, MAE: 0.44, MSE: 0.28


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
WM 완료 - sMAPE: 6.71%, MAE: 13.42, MSE: 200.22


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
WAT 완료 - sMAPE: 2.41%, MAE: 8.16, MSE: 142.20


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
WEC 완료 - sMAPE: 1.94%, MAE: 1.66, MSE: 4.62


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
WFC 완료 - sMAPE: 3.85%, MAE: 2.30, MSE: 8.10


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
WELL 완료 - sMAPE: 2.20%, MAE: 2.45, MSE: 9.90


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
WST 완료 - sMAPE: 3.17%, MAE: 10.73, MSE: 191.92


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
WDC 완료 - sMAPE: 3.84%, MAE: 1.90, MSE: 5.79


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
WY 완료 - sMAPE: 2.02%, MAE: 0.62, MSE: 0.68


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
WSM 완료 - sMAPE: 8.77%, MAE: 12.35, MSE: 212.91


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
WMB 완료 - sMAPE: 4.76%, MAE: 2.14, MSE: 7.47


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
WTW 완료 - sMAPE: 3.63%, MAE: 10.11, MSE: 147.50


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
WDAY 완료 - sMAPE: 2.51%, MAE: 6.32, MSE: 73.24


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
WYNN 완료 - sMAPE: 2.35%, MAE: 2.17, MSE: 9.20


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
XEL 완료 - sMAPE: 2.10%, MAE: 1.21, MSE: 2.75


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
XYL 완료 - sMAPE: 2.21%, MAE: 2.85, MSE: 12.69


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
YUM 완료 - sMAPE: 1.83%, MAE: 2.43, MSE: 9.80


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
ZBRA 완료 - sMAPE: 2.73%, MAE: 8.65, MSE: 115.66


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
ZBH 완료 - sMAPE: 1.11%, MAE: 1.26, MSE: 2.99


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
ZTS 완료 - sMAPE: 2.08%, MAE: 3.62, MSE: 22.17

--- Top 10 종목 (sMAPE 낮은 순) ---
    Symbol     sMAPE       MAE       MSE sMAPE_Group
134    ZBH  1.111041  1.264735  2.988322        0~5%
9      PPG  1.194304  1.536276  3.670559        0~5%
27     REG  1.238123  0.796961  1.008284        0~5%
106   VICI  1.483137  0.436170  0.363664        0~5%
63     SYY  1.488742  1.107171  2.264630        0~5%
1      PEP  1.504068  2.474611  9.248878        0~5%
2      PFE  1.596574  0.416336  0.272240        0~5%
87     TSN  1.624364  0.928745  1.447948        0~5%
49      SW  1.627286  0.866781  0.843087        0~5%
100   VLTO  1.679559  1.762619  4.613170        0~5%

--- sMAPE 오차 범위별 종목 개수 (5% 단위) ---
sMAPE_Group
0~5%       108
5~10%       21
10~15%       5
15~20%       0
20~25%       2
25~30%       0
30~35%       0
35~40%       0
40~45%       0
45~50%       0
50~55%       0
55~60%       0
60~65%       0
65~70%       0
70~75%    

In [None]:
# # sMAPE 계산 함수
# def smape(a, f):
#     return 100 / len(a) * np.sum(2 * np.abs(f - a) / (np.abs(a) + np.abs(f)))

# results = {}

# folder_path = 'sp500_stocks'
# for filename in os.listdir(folder_path):
    
#     if not filename.endswith('.csv'):
#         continue

#     symbol = filename.replace('.csv', '')
#     path = os.path.join(folder_path, filename)

#     try:
#         df = pd.read_csv(path).dropna()

#         # 기술적 지표 추가
#         df['MA20'] = ta.trend.sma_indicator(df['Close'], window=20)
#         bb = ta.volatility.BollingerBands(df['Close'], window=20, window_dev=2)
#         df['Upper'] = bb.bollinger_hband()
#         df['Lower'] = bb.bollinger_lband()
#         df['RSI'] = ta.momentum.RSIIndicator(df['Close'], window=14).rsi()
#         df.dropna(inplace=True)

#         features = ['Open', 'High', 'Low', 'Close', 'Volume', 'MA20', 'Upper', 'Lower', 'RSI']
#         scaler = MinMaxScaler()
#         scaled_data = scaler.fit_transform(df[features])

#         sequence_length = 50
#         X, y = [], []
#         for i in range(len(scaled_data) - sequence_length):
#             X.append(scaled_data[i:i+sequence_length])
#             y.append(scaled_data[i+sequence_length][features.index('Close')])
#         X = np.array(X)
#         y = np.array(y)

#         # 훈련/테스트 분리
#         split = int(0.8 * len(X))
#         X_train, X_test = X[:split], X[split:]
#         y_train, y_test = y[:split], y[split:]

     
#         model = Sequential([
#             LSTM(64, return_sequences=True, input_shape=(X.shape[1], X.shape[2])),
#             Dropout(0.3),
#             LSTM(32),
#             Dropout(0.3),
#             Dense(1)
#         ])
#         model.compile(optimizer='adam', loss='mse')
#         early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

#         model.fit(X_train, y_train, epochs=50, batch_size=32,
#                   validation_split=0.2, callbacks=[early_stop], verbose=0)

#         # 예측 및 역변환
#         pred = model.predict(X_test)
#         close_index = features.index('Close')
#         y_full = np.zeros((len(y_test), len(features)))
#         pred_full = np.zeros((len(pred), len(features)))
#         y_full[:, close_index] = y_test
#         pred_full[:, close_index] = pred.flatten()
#         true_rescaled = scaler.inverse_transform(y_full)[:, close_index]
#         pred_rescaled = scaler.inverse_transform(pred_full)[:, close_index]

#         # sMAPE 저장
#         smape_val = smape(true_rescaled, pred_rescaled)
#         results[symbol] = smape_val
#         print(f"{symbol} 완료 - sMAPE: {smape_val:.2f}%")

#     except Exception as e:
#         print(f"{symbol} 실패: {e}")


In [None]:
# results_df = pd.DataFrame(sorted(results.items(), key=lambda x: x[1]), columns=['Symbol', 'sMAPE'])

# # 5% 단위로 구간 설정
# bins = np.arange(0, 105, 5)  
# labels = [f'{i}~{i+5}%' for i in bins[:-1]]


# results_df['sMAPE_Group'] = pd.cut(results_df['sMAPE'], bins=bins, labels=labels, right=False)

# print("\n--- Top 10 종목 (sMAPE 낮은 순) ---")
# print(results_df.sort_values('sMAPE').head(10))

# print("\n--- sMAPE 오차 범위별 종목 개수 (5% 단위) ---")
# print(results_df['sMAPE_Group'].value_counts().sort_index())

# # 1~5%  : 72.5%
# # 5~10% : 20.8% 


--- Top 10 종목 (sMAPE 낮은 순) ---
  Symbol     sMAPE sMAPE_Group
0   GILD  1.076772        0~5%
1     MO  1.226468        0~5%
2    CPB  1.253405        0~5%
3    KMB  1.372075        0~5%
4   FOXA  1.563924        0~5%
5    FOX  1.567218        0~5%
6    LYB  1.599551        0~5%
7      K  1.638793        0~5%
8     PM  1.648253        0~5%
9      O  1.671763        0~5%

--- sMAPE 오차 범위별 종목 개수 (5% 단위) ---
sMAPE_Group
0~5%       359
5~10%      103
10~15%      24
15~20%       7
20~25%       0
25~30%       0
30~35%       1
35~40%       0
40~45%       0
45~50%       0
50~55%       0
55~60%       0
60~65%       0
65~70%       0
70~75%       0
75~80%       0
80~85%       0
85~90%       0
90~95%       0
95~100%      0
Name: count, dtype: int64
