In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
import ta
import yfinance as yf
from datetime import datetime

In [None]:
# 1. S&P 500 심볼 자동 수집 (Wikipedia 사용)
def get_sp500_symbols():
    url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
    tables = pd.read_html(url)
    df = tables[0]
    return df['Symbol'].tolist()

# 2. 기존 진행된 심볼 로드 (없으면 빈 set)
PROGRESS_FILE = 'sp500_progress.txt'
OUTPUT_FILE = 'sp500_latest.csv'

def load_progress():
    if os.path.exists(PROGRESS_FILE):
        with open(PROGRESS_FILE, 'r') as f:
            done_symbols = set([line.strip() for line in f if line.strip()])
    else:
        done_symbols = set()
    return done_symbols

def save_progress(symbol):
    with open(PROGRESS_FILE, 'a') as f:
        f.write(symbol + '\n')

# 3. 기간 설정
default_start_date = '2015-01-01'
end_date = '2025-01-01'

# 4. 심볼 리스트 (테스트용 상위 30개 이후)
sp500_symbols = get_sp500_symbols()[31:]

print(f"[INFO] Total symbols to process (test mode): {len(sp500_symbols)}")

# 5. 기존 진행된 심볼 불러오기
done_symbols = load_progress()

print(f"[INFO] Symbols already processed (will skip): {len(done_symbols)}")

# 6. 종목 순회
for symbol in sp500_symbols:
    
    if symbol in done_symbols:
        print(f"[SKIP] Already processed {symbol}")
        continue

    try:
        print(f"[INFO] Processing {symbol} ...")

        # 상장일 확인
        ticker = yf.Ticker(symbol)
        info = ticker.info
        ipo_date = info.get('ipoDate', None)

        if ipo_date:
            ipo_date_parsed = pd.to_datetime(ipo_date).strftime('%Y-%m-%d')
            start_date = max(ipo_date_parsed, default_start_date)
        else:
            start_date = default_start_date

        print(f"[INFO] Fetching data from {start_date} to {end_date}")

        df = yf.download(symbol, start=start_date, end=end_date, progress=False)

        if df.empty:
            print(f"[WARN] No data found for {symbol}. Skipping.")
            save_progress(symbol)
            continue

        # 필요한 칼럼만 선택
        df = df[['Open', 'High', 'Low', 'Close', 'Volume']].copy()
        df.reset_index(inplace=True)

        # Symbol을 가장 첫 칼럼으로 추가
        df.insert(0, 'Symbol', symbol)

        if os.path.exists(OUTPUT_FILE):
            df.to_csv(OUTPUT_FILE, mode='a', header=False, index=False)
        else:
            df.to_csv(OUTPUT_FILE, mode='w', header=True, index=False)

        save_progress(symbol)
        print(f"[INFO] {symbol} data saved and marked as done.")

    except Exception as e:
        print(f"[ERROR] Failed to process {symbol}: {e}")

print("[INFO] Finished all symbols (test mode 30 symbols).")


In [None]:
# TF 버전
# wsl2(tensorflow 2.13 학습) -> window(tensorflow 2.18 호출)

import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
import keras
import joblib
import ta

# 1. 모델과 스케일러 불러오기
model = keras.layers.TFSMLayer('LSTM_MODEL_TF_GPU/AOS', call_endpoint='serving_default')
scaler = joblib.load('LSTM_MODEL_TF_GPU/AOS_scaler.joblib')

# 2. 종목 및 최근 50일 데이터 다운로드 (1일 단위)
symbol = 'AOS'
df = yf.download(symbol, period='150d', interval='1d')  # 150일 요청해서 혹시 결측일 제거
df.columns = df.columns.droplevel(1)

# 3. 기술적 지표 추가 (학습 당시 동일하게)
df['MA20'] = ta.trend.sma_indicator(df['Close'], window=20)
bb = ta.volatility.BollingerBands(df['Close'], window=20, window_dev=2)
df['Upper'] = bb.bollinger_hband()
df['Lower'] = bb.bollinger_lband()
df['RSI'] = ta.momentum.RSIIndicator(df['Close'], window=14).rsi()
df.dropna(inplace=True)  

# 4. 가장 최근 50일 슬라이싱
features = ['Open', 'High', 'Low', 'Close', 'Volume', 'MA20', 'Upper', 'Lower', 'RSI']
df_recent = df[-50:].copy()
# print(df_recent.tail())

# 5. 스케일러 적용
scaled = scaler.transform(df_recent[features])

# 6. LSTM 입력 형태 (배치, 시퀀스 길이, 피처 수) => (1, 50, 피처)
X_input = np.expand_dims(scaled, axis=0)
pred_dict = model(X_input)  # dict
pred_scaled = list(pred_dict.values())[0].numpy()


# 8. Close만 역변환 (주의: 나머지 피처 자리 더미 넣어서 scaler.inverse_transform 사용)
dummy_full = np.zeros((1, len(features)))
close_index = features.index('Close')
dummy_full[:, close_index] = pred_scaled.flatten()

pred_rescaled = scaler.inverse_transform(dummy_full)[:, close_index]

print(f"예상 다음날 종가 ({symbol}): {pred_rescaled[0]:.2f}")


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
[*********************100%***********************]  1 of 1 completed

예상 다음날 종가 (AOS): 68.10





In [None]:
# h5 버전 1
# window(tensorflow 2.18 학습) -> window(tensorflow 2.18 호출)

import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
import keras
import joblib
import ta
import os
import tensorflow as tf

# 1. 모델과 스케일러 불러오기
model = tf.keras.models.load_model(os.path.join('LSTM_MODEL_H5_WIN', 'AOS.h5'), compile=False)
scaler = joblib.load('LSTM_MODEL_H5_WIN/AOS_scaler.joblib')

# 2. 데이터 준비
symbol = 'AOS'
df = yf.download(symbol, period='150d', interval='1d')
df.columns = df.columns.droplevel(1)

# 3. 기술적 지표 추가 (학습 당시 동일하게)
df['MA20'] = ta.trend.sma_indicator(df['Close'], window=20)
bb = ta.volatility.BollingerBands(df['Close'], window=20, window_dev=2)
df['Upper'] = bb.bollinger_hband()
df['Lower'] = bb.bollinger_lband()
df['RSI'] = ta.momentum.RSIIndicator(df['Close'], window=14).rsi()
df.dropna(inplace=True)  # 지표 계산으로 인한 NaN 제거

# 4. 특징 추출 및 스케일링
features = ['Open', 'High', 'Low', 'Close', 'Volume', 'MA20', 'Upper', 'Lower', 'RSI']
X_recent = df[features].tail(50)  # 최근 50일
X_scaled = scaler.transform(X_recent)  # 주의: fit_transform 금지

# 5. 예측 수행
X_input = np.expand_dims(X_scaled, axis=0)
pred = model.predict(X_input)  # compile=False여도 predict()는 가능

# 6. 역변환
dummy = np.zeros((1, len(features)))
close_idx = features.index('Close')
dummy[:, close_idx] = pred.flatten()
pred_close = scaler.inverse_transform(dummy)[0, close_idx]

print(f"예상 다음날 종가 ({symbol}): ${pred_close:.2f}")


[*********************100%***********************]  1 of 1 completed

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 136ms/step
예상 다음날 종가 (AOS): $67.66





In [None]:
# h5 버전 2
# wsl2(tensorflow 2.13 학습) -> window(tensorflow 2.18 호출)

import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
import keras
import joblib
import ta

# 1. 모델과 스케일러 불러오기
custom_objects = {
    'LSTM': lambda **kwargs: LSTM(**{k:v for k,v in kwargs.items() if k != 'time_major'})
}
model = tf.keras.models.load_model(
    'LSTM_MODEL_H5_WIN/AOS.h5',
    custom_objects=custom_objects,
    compile=False  
)
scaler = joblib.load('LSTM_MODEL_H5_WIN/AOS_scaler.joblib')

# 2. 데이터 준비
symbol = 'AOS'
df = yf.download(symbol, period='150d', interval='1d')
df.columns = df.columns.droplevel(1)

# 3. 기술적 지표 추가 (학습 당시 동일하게)
df['MA20'] = ta.trend.sma_indicator(df['Close'], window=20)
bb = ta.volatility.BollingerBands(df['Close'], window=20, window_dev=2)
df['Upper'] = bb.bollinger_hband()
df['Lower'] = bb.bollinger_lband()
df['RSI'] = ta.momentum.RSIIndicator(df['Close'], window=14).rsi()
df.dropna(inplace=True)  # 지표 계산으로 인한 NaN 제거

# 4. 특징 추출 및 스케일링
features = ['Open', 'High', 'Low', 'Close', 'Volume', 'MA20', 'Upper', 'Lower', 'RSI']
X_recent = df[features].tail(50)  # 최근 50일
X_scaled = scaler.transform(X_recent)  # 주의: fit_transform 금지

# 5. 예측 수행
X_input = np.expand_dims(X_scaled, axis=0)
pred = model.predict(X_input)  # compile=False여도 predict()는 가능

# 6. 역변환
dummy = np.zeros((1, len(features)))
close_idx = features.index('Close')
dummy[:, close_idx] = pred.flatten()
pred_close = scaler.inverse_transform(dummy)[0, close_idx]

print(f"예상 다음날 종가 ({symbol}): ${pred_close:.2f}")


[*********************100%***********************]  1 of 1 completed

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 155ms/step
예상 다음날 종가 (AOS): $67.66





In [None]:
'''

import pandas as pd
import numpy as np
import ta  # 기술적 지표 라이브러리
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Dense
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow as tf

# sMAPE 함수
def smape(y_true, y_pred):
    denominator = (np.abs(y_true) + np.abs(y_pred)) / 2
    diff = np.abs(y_true - y_pred) / denominator
    diff[denominator == 0] = 0.0
    return np.mean(diff) * 100

# CSV 로드
df = pd.read_csv('sp500_latest.csv')

# 기술적 지표 추가
df['MA20'] = ta.trend.sma_indicator(df['Close'], window=20)
bb = ta.volatility.BollingerBands(df['Close'], window=20, window_dev=2)
df['Upper'] = bb.bollinger_hband()
df['Lower'] = bb.bollinger_lband()
df['RSI'] = ta.momentum.RSIIndicator(df['Close'], window=14).rsi()
df.dropna(inplace=True)

# Symbol 원핫 인코딩
symbol_col = 'Symbol'
encoder = OneHotEncoder(sparse_output=False)
symbol_encoded = encoder.fit_transform(df[[symbol_col]])

# 가격 및 지표 스케일링
features = ['Open', 'High', 'Low', 'Close', 'Volume', 'MA20', 'Upper', 'Lower', 'RSI']
scaler = MinMaxScaler()
scaled_features = scaler.fit_transform(df[features])

# Symbol + scaled features 병합
X_combined = np.concatenate([symbol_encoded, scaled_features], axis=1)

# 슬라이딩 윈도우
sequence_length = 50
X_seq, y_seq = [], []
for i in range(len(X_combined) - sequence_length):
    X_seq.append(X_combined[i:i+sequence_length])
    y_seq.append(scaled_features[i+sequence_length][features.index('Close')])  # Close column

X_seq = np.array(X_seq)
y_seq = np.array(y_seq)

# 데이터 나누기 (70/20/10)
total_len = len(X_seq)
train_end = int(0.7 * total_len)
val_end = int(0.9 * total_len)

X_train, y_train = X_seq[:train_end], y_seq[:train_end]
X_val, y_val = X_seq[train_end:val_end], y_seq[train_end:val_end]
X_test, y_test = X_seq[val_end:], y_seq[val_end:]

# 모델 구성
model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(X_seq.shape[1], X_seq.shape[2])),
    Dropout(0.3),
    LSTM(32),
    Dropout(0.3),
    Dense(1)
])

model.compile(optimizer='adam', loss='mse')

# 조기 종료
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# 학습
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_val, y_val), callbacks=[early_stop], verbose=1)

# 예측
pred = model.predict(X_test)

# 역변환을 위한 dummy
close_index = features.index('Close')
y_full = np.zeros((len(y_test), len(features)))
pred_full = np.zeros((len(pred), len(features)))
y_full[:, close_index] = y_test
pred_full[:, close_index] = pred.flatten()

true_rescaled = scaler.inverse_transform(y_full)[:, close_index]
pred_rescaled = scaler.inverse_transform(pred_full)[:, close_index]

# sMAPE 계산
smape_val = smape(true_rescaled, pred_rescaled)
print(f"sMAPE: {smape_val:.2f}%")

# 모델 저장
model.save('sp500_multi_temp.h5')
print("모델 저장 완료: sp500_multi_temp.h5")

'''