# 주식 예측 모델 학습
Google Colab에서 실행하여 ONNX 모델 생성

In [None]:
# 필요한 패키지 설치
!pip install yfinance pandas numpy scikit-learn xgboost tensorflow onnx tf2onnx onnxruntime

In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
import xgboost as xgb
import onnx
import tf2onnx
from datetime import datetime, timedelta

In [None]:
# 데이터 수집 함수
def collect_stock_data(tickers, period='2y'):
    all_data = []
    
    for ticker in tickers:
        try:
            stock = yf.Ticker(ticker)
            hist = stock.history(period=period)
            info = stock.info
            
            # 기술적 지표 계산
            hist['Returns'] = hist['Close'].pct_change()
            hist['MA20'] = hist['Close'].rolling(window=20).mean()
            hist['MA60'] = hist['Close'].rolling(window=60).mean()
            hist['RSI'] = calculate_rsi(hist['Close'])
            hist['Volatility'] = hist['Returns'].rolling(window=20).std()
            
            # 펀더멘털 데이터 추가
            hist['PE_Ratio'] = info.get('trailingPE', 15)
            hist['ROE'] = info.get('returnOnEquity', 0.1) * 100
            
            # 타겟 변수: 다음날 상승 여부
            hist['Target'] = (hist['Close'].shift(-1) > hist['Close']).astype(int)
            
            hist['Ticker'] = ticker
            all_data.append(hist)
            
        except Exception as e:
            print(f"Error processing {ticker}: {e}")
    
    return pd.concat(all_data, axis=0)

def calculate_rsi(prices, period=14):
    delta = prices.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=period).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=period).mean()
    rs = gain / loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

In [None]:
# 주요 종목 리스트
kr_tickers = ['005930.KS', '000660.KS', '035420.KS', '051910.KS', '006400.KS']
us_tickers = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'META']

# 데이터 수집
print("데이터 수집 중...")
df = collect_stock_data(kr_tickers + us_tickers)
print(f"수집 완료: {len(df)} 행")

In [None]:
# 특징 준비
feature_cols = ['Returns', 'MA20', 'MA60', 'RSI', 'Volatility', 'PE_Ratio', 'ROE']
df_clean = df.dropna()

X = df_clean[feature_cols]
y = df_clean['Target']

# 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 스케일링
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
# LSTM 모델
def create_lstm_model(input_shape):
    model = keras.Sequential([
        keras.layers.LSTM(64, return_sequences=True, input_shape=input_shape),
        keras.layers.Dropout(0.2),
        keras.layers.LSTM(32),
        keras.layers.Dropout(0.2),
        keras.layers.Dense(16, activation='relu'),
        keras.layers.Dense(1, activation='sigmoid')
    ])
    
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# 시계열 데이터 준비 (간단한 버전)
X_train_lstm = X_train_scaled.reshape((X_train_scaled.shape[0], 1, X_train_scaled.shape[1]))
X_test_lstm = X_test_scaled.reshape((X_test_scaled.shape[0], 1, X_test_scaled.shape[1]))

lstm_model = create_lstm_model((1, X_train_scaled.shape[1]))
lstm_model.fit(X_train_lstm, y_train, epochs=10, batch_size=32, validation_split=0.2, verbose=1)

In [None]:
# XGBoost 모델
xgb_model = xgb.XGBClassifier(
    n_estimators=100,
    max_depth=5,
    learning_rate=0.1,
    random_state=42
)

xgb_model.fit(X_train_scaled, y_train)

# 성능 평가
from sklearn.metrics import accuracy_score, classification_report

lstm_pred = (lstm_model.predict(X_test_lstm) > 0.5).astype(int)
xgb_pred = xgb_model.predict(X_test_scaled)

print("LSTM Accuracy:", accuracy_score(y_test, lstm_pred))
print("XGBoost Accuracy:", accuracy_score(y_test, xgb_pred))

In [None]:
# ONNX 변환
# LSTM 모델 변환
input_signature = [tf.TensorSpec((None, 1, X_train_scaled.shape[1]), tf.float32, name='input')]
onnx_model_lstm, _ = tf2onnx.convert.from_keras(lstm_model, input_signature=input_signature)
onnx.save(onnx_model_lstm, 'lstm_model.onnx')

# XGBoost는 별도 변환 필요
# xgb_model.save_model('xgboost_model.json')

print("모델 저장 완료!")

In [None]:
# Google Drive에 저장 (Colab에서 실행 시)
from google.colab import drive
drive.mount('/content/drive')

!cp lstm_model.onnx /content/drive/MyDrive/
print("Google Drive에 모델 저장 완료!")