# 拡張時系列モデル評価
複数のモデルを試し、途中経過を表示しながら精度を比較します。

In [None]:
import warnings
warnings.filterwarnings('ignore')

# データ取得と前処理
import yfinance as yf
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.model_selection import TimeSeriesSplit, RandomizedSearchCV, train_test_split
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
import lightgbm as lgb
from catboost import CatBoostClassifier

# pip install yfinance numpy pandas scikit-learn xgboost lightgbm catboost


In [2]:
# ------------------------------------------------------------
# 0. データ取得・前処理
# ------------------------------------------------------------
# 2018–2023年の米国3大IT銘柄を例にダウンロード
tickers = ["AAPL", "MSFT", "GOOG"]
df = yf.download(
    tickers,
    start="2018-01-01",
    end="2023-12-31",
    interval="1d",
    auto_adjust=True
)

# 欲しい列をリネームして結合
price = df["Close"].rename(columns=lambda c: f"{c}_Close")
vol   = df["Volume"].rename(columns=lambda c: f"{c}_Volume")
data  = pd.concat([price, vol], axis=1).dropna()

# テクニカル指標：5日移動平均とRSIを追加
for t in tickers:
    data[f"{t}_MA5"] = data[f"{t}_Close"].rolling(window=5).mean()
    delta = data[f"{t}_Close"].diff()
    up, down = delta.clip(lower=0), -delta.clip(upper=0)
    rs = up.rolling(14).mean() / down.rolling(14).mean()
    data[f"{t}_RSI"] = 100 - 100 / (1 + rs)

# 目的変数：AAPL の翌日リターンを計算し、正負をラベル化
data["Return"] = data["AAPL_Close"].pct_change().shift(-1)
data = data.dropna()
data["Label"] = (data["Return"] > 0).astype(int)

# 説明変数・ラベルを取得
feature_cols = [c for c in data.columns if c not in ["Return", "Label"]]
X = data[feature_cols].values
y = data["Label"].values

[*********************100%***********************]  3 of 3 completed


In [3]:
# ------------------------------------------------------------
# 1. 学習/テスト分割とスケーリング
# ------------------------------------------------------------
# 時系列保持で 80% 学習、20% テスト
split_idx = int(len(X) * 0.8)
X_tr, X_te = X[:split_idx], X[split_idx:]
y_tr, y_te = y[:split_idx], y[split_idx:]

# 標準化
scaler      = StandardScaler()
X_tr_scaled = scaler.fit_transform(X_tr)
X_te_scaled = scaler.transform(X_te)

In [4]:
# ------------------------------------------------------------
# 2. 不均衡対応の重み計算
# ------------------------------------------------------------
pos_weight = (y_tr == 0).sum() / (y_tr == 1).sum()

In [6]:
# ------------------------------------------------------------
# 3. 学習用と検証用にさらに分割（early stopping 用）
# ------------------------------------------------------------
X_train_part, X_val, y_train_part, y_val = train_test_split(
    X_tr_scaled, y_tr, test_size=0.2, shuffle=False
)


In [7]:
# ------------------------------------------------------------
# 4. モデルリスト＆ハイパーパラメータ空間
# ------------------------------------------------------------
models = [
    ("LogisticRegression",
        LogisticRegression(max_iter=1000, class_weight="balanced"),
        {"C": [0.1, 1, 10]}),
    ("XGBoost",
        XGBClassifier(
            random_state=42,
            scale_pos_weight=pos_weight,
            use_label_encoder=False,
            eval_metric="logloss"
        ),
        {"n_estimators": [100, 200], "max_depth": [3, 5]}),
    ("LightGBM",
        LGBMClassifier(random_state=42, is_unbalance=True),
        {"n_estimators": [100, 200], "num_leaves": [31, 63]}),
    ("CatBoost",
        CatBoostClassifier(random_state=42, verbose=0),
        {"depth": [4, 6], "learning_rate": [0.03, 0.1]})
]

results = []

In [11]:
# ------------------------------------------------------------
# 5. 各モデルのチューニング＆再学習＆評価ループ
# ------------------------------------------------------------
results = []
for name, base_model, param_dist in models:
    print(f"\n▶️ {name} のランダムサーチを開始…")
    search = RandomizedSearchCV(
        base_model,
        param_distributions=param_dist,
        n_iter=2,
        cv=TimeSeriesSplit(n_splits=3),
        scoring="accuracy",
        n_jobs=-1,
        random_state=42
    )
    search.fit(X_train_part, y_train_part)
    best = search.best_estimator_
    print("  ベストパラメータ:", search.best_params_)

    # ✔️ 最良モデルを再学習（early stopping を使いたい場合は以下のように）
    if name == "XGBoost":
        try:
            best.fit(
                X_train_part, y_train_part,
                eval_set=[(X_val, y_val)],
                early_stopping_rounds=20,
                verbose=False
            )
        except TypeError:
            # 古い xgboost では early_stopping_rounds をコンストラクタに渡す必要があるかも
            best = XGBClassifier(**search.best_params_, random_state=42,
                                 eval_metric="logloss", use_label_encoder=False,
                                 early_stopping_rounds=20)
            best.fit(X_train_part, y_train_part,
                     eval_set=[(X_val, y_val)], verbose=False)
    elif name == "LightGBM":
        try:
            best.fit(
                X_train_part, y_train_part,
                eval_set=[(X_val, y_val)],
                early_stopping_rounds=20,
                verbose=False
            )
        except TypeError:
            best.fit(
                X_train_part, y_train_part,
                eval_set=[(X_val, y_val)],
                callbacks=[lgb.early_stopping(stopping_rounds=20)],
                verbose=False
            )
    elif name == "CatBoost":
        best.fit(
            X_train_part, y_train_part,
            eval_set=[(X_val, y_val)],
            verbose=False
        )
    else:
        # LogisticRegression のように early stopping を持たないモデル
        best.fit(X_train_part, y_train_part)

    # テストデータで予測＆評価
    y_pred = best.predict(X_te_scaled)
    acc    = accuracy_score(y_te, y_pred)

    # AUC を計算できるモデルだけ try/except で包む
    try:
        proba = best.predict_proba(X_te_scaled)[:, 1]
        auc   = roc_auc_score(y_te, proba)
    except Exception:
        auc = np.nan

    print(f"  Test Accuracy: {acc:.3f} / AUC: {auc:.3f}")
    results.append({"Model": name, "Accuracy": acc, "AUC": auc})



▶️ LogisticRegression のランダムサーチを開始…
  ベストパラメータ: {'C': 0.1}
  Test Accuracy: 0.525 / AUC: 0.549

▶️ XGBoost のランダムサーチを開始…
  ベストパラメータ: {'n_estimators': 200, 'max_depth': 3}
  Test Accuracy: 0.528 / AUC: 0.523

▶️ LightGBM のランダムサーチを開始…


  File "c:\Users\T123011\AppData\Local\Programs\Python\Python312\Lib\site-packages\joblib\externals\loky\backend\context.py", line 247, in _count_physical_cores
    cpu_count_physical = _count_physical_cores_win32()
                         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\T123011\AppData\Local\Programs\Python\Python312\Lib\site-packages\joblib\externals\loky\backend\context.py", line 299, in _count_physical_cores_win32
    cpu_info = subprocess.run(
               ^^^^^^^^^^^^^^^
  File "c:\Users\T123011\AppData\Local\Programs\Python\Python312\Lib\subprocess.py", line 548, in run
    with Popen(*popenargs, **kwargs) as process:
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\T123011\AppData\Local\Programs\Python\Python312\Lib\subprocess.py", line 1026, in __init__
    self._execute_child(args, executable, preexec_fn, close_fds,
  File "c:\Users\T123011\AppData\Local\Programs\Python\Python312\Lib\subprocess.py", line 1538, in _execute_child
    hp, ht, pid, tid = _w

[LightGBM] [Info] Number of positive: 517, number of negative: 439
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000871 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3060
[LightGBM] [Info] Number of data points in the train set: 956, number of used features: 12
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.540795 -> initscore=0.163543
[LightGBM] [Info] Start training from score 0.163543
  ベストパラメータ: {'num_leaves': 63, 'n_estimators': 100}


TypeError: LGBMClassifier.fit() got an unexpected keyword argument 'early_stopping_rounds'

In [None]:
# ------------------------------------------------------------
# 6. 結果出力
# ------------------------------------------------------------
df_res = pd.DataFrame(results).set_index("Model")
print("\n=== モデル比較結果 ===")
print(df_res)