<a href="https://colab.research.google.com/github/nanpolend/machine-learning/blob/master/Jane_street2025_ai%E6%AF%94%E8%B3%BD%E4%BB%A3%E7%A2%BCgpt4o%E5%BC%B7%E5%8C%96%E5%AD%B8%E7%BF%92%E7%89%88.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
+---------------------+     +---------------------+     +---------------------+     +---------------------+     +---------------------+
|  1. 資料載入        | --> | 2. 資料預處理       | --> | 3. 特徵工程       | --> | 4. 模型訓練        | --> | 5. 模型評估        |
+---------------------+     +---------------------+     +---------------------+     +---------------------+     +---------------------+
       ^                                                                                                              |
       |                                                                                                              V
       +---------------------------------------------------------------------------------------------------------------+
                                                           6. 視覺化 (混淆矩陣)

In [1]:
import os
import gym
import numpy as np
import pandas as pd
from gym import spaces
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import DummyVecEnv
from stable_baselines3.common.callbacks import BaseCallback
import tensorflow as tf
import matplotlib.pyplot as plt

# TensorBoard callback for extra logging
class TensorboardCallback(BaseCallback):
    def __init__(self, verbose=0):
        super().__init__(verbose)

    def _on_step(self) -> bool:
        # Example of logging a custom metric
        self.logger.record("custom/steps", self.num_timesteps)
        return True

# 自訂強化學習環境
class CustomEnv(gym.Env):
    def __init__(self, X, y):
        super().__init__()
        self.X = X.astype(np.float32)
        self.y = y.astype(int)
        self.index = 0
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(X.shape[1],), dtype=np.float32)
        self.action_space = spaces.Discrete(len(np.unique(y)))

    def reset(self):
        self.index = 0
        return self.X[self.index]

    def step(self, action):
        correct = int(action == self.y[self.index])
        reward = 1.0 if correct else -1.0
        self.index += 1
        done = self.index >= len(self.X)
        obs = self.X[self.index] if not done else np.zeros_like(self.X[0])
        return obs, reward, done, {}

# 資料處理函數
def 載入並預處理資料(csv_path):
    df = pd.read_csv(csv_path)
    df.fillna(df.mean(), inplace=True)
    特徵 = [col for col in df.columns if 'feature' in col]
    X = df[特徵].values
    y = df['action'].values
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    return train_test_split(X, y, test_size=0.2, random_state=42)

# 訓練與測試代理
def 訓練與測試(csv_path):
    X_train, X_test, y_train, y_test = 載入並預處理資料(csv_path)

    # 環境初始化
    train_env = DummyVecEnv([lambda: CustomEnv(X_train, y_train)])
    test_env = CustomEnv(X_test, y_test)

    # PPO訓練
    model = PPO("MlpPolicy", train_env, verbose=1, tensorboard_log="./ppo_log")
    model.learn(total_timesteps=10000, callback=TensorboardCallback())

    # 測試
    obs = test_env.reset()
    done = False
    total = 0
    correct = 0
    while not done:
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, _ = test_env.step(action)
        if reward > 0:
            correct += 1
        total += 1

    print(f"測試準確率: {correct / total:.2f}")

    return model

# TensorBoard 可視化提示
if __name__ == "__main__":
    csv_file = os.path.join(os.getcwd(), "train.csv")
    if not os.path.exists(csv_file):
        print("請將資料檔 train.csv 放置在程式目錄下")
    else:
        model = 訓練與測試(csv_file)
        print("訓練完成，請執行以下指令來檢視 TensorBoard：")
        print("\ntensorboard --logdir=./ppo_log\n")


ModuleNotFoundError: No module named 'stable_baselines3'