<a href="https://colab.research.google.com/github/nanpolend/machine-learning/blob/master/jane_street2025_ai_deepseek.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# -*- coding: utf-8 -*-
"""
Jane Street 市場數據預測集成模型（深度學習強化版）
版本：v4.0-dl-optimized

主要改进：
1. 引入LSTM時序特征提取
2. 自定義財務損失函數
3. 添加Attention機制
4. 強化特征工程
5. 動態風險控制
6. 多層次集成策略
"""

# ==== 環境設定 ====
!pip install pandas==2.1.4 scikit-learn==1.3.2 xgboost==2.0.3 tensorflow==2.15.0 numpy==1.26.0 keras-tcn
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import TimeSeriesSplit
from xgboost import XGBRegressor
from sklearn.impute import KNNImputer
from tcn import TCN
import matplotlib.pyplot as plt

np.random.seed(42)
tf.random.set_seed(42)

# ==== 自定義損失函數 ====
def financial_loss(y_true, y_pred, sample_weight):
    """
    結合財務特性的自定義損失函數：
    1. 方向準確性獎勵
    2. 波動率懲罰
    3. 非線性收益轉換
    """
    direction_reward = tf.where(
        tf.equal(tf.sign(y_true), tf.sign(y_pred)),
        tf.abs(y_true) * 2.0,  # 方向正確時強化收益
        tf.abs(y_true) * (-1.5)  # 方向錯誤時加重懲罰
    )

    volatility_penalty = 0.3 * tf.math.reduce_std(y_pred)

    return tf.reduce_mean(sample_weight * (direction_reward - volatility_penalty))

# ==== 數據強化處理 ====
class AdvancedFeatureEngineer:
    """強化版特征工程模塊"""
    def __init__(self, window_size=10):
        self.window_size = window_size

    def add_technical_features(self, df):
        # 添加技術指標
        df['MA_10'] = df['resp'].rolling(window=10).mean()
        df['RSI_14'] = self._calculate_rsi(df['resp'], 14)
        df['Volatility_20'] = df['resp'].rolling(20).std()
        return df

    def _calculate_rsi(self, series, period):
        delta = series.diff()
        gain = (delta.where(delta > 0, 0)).rolling(period).mean()
        loss = (-delta.where(delta < 0, 0)).rolling(period).mean()
        rs = gain / loss
        return 100 - (100 / (1 + rs))

    def add_time_features(self, df):
        # 時間相關特征
        df['hour'] = df['date'].dt.hour
        df['day_of_week'] = df['date'].dt.dayofweek
        df['month'] = df['date'].dt.month
        return df

    def process(self, df):
        df = self.add_technical_features(df)
        df = self.add_time_features(df)
        return df.fillna(method='ffill').fillna(0)

# ==== 深度學習模型架構 ====
def build_hybrid_model(input_shape):
    """時序特征+全連接混合架構"""
    inputs = tf.keras.Input(shape=input_shape)

    # 時序特征提取層
    x = TCN(nb_filters=64, kernel_size=3, nb_stacks=2,
           dropout_rate=0.2, return_sequences=False)(inputs)

    # 注意力機制
    x = tf.keras.layers.Attention()([x, x])

    # 全連接層
    x = tf.keras.layers.Dense(128, activation='swish')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dropout(0.3)(x)

    # 多任務輸出
    regression_out = tf.keras.layers.Dense(1, name='regression')(x)
    classification_out = tf.keras.layers.Dense(1, activation='sigmoid', name='classification')(x)

    model = tf.keras.Model(inputs=inputs, outputs=[regression_out, classification_out])

    # 自定義優化器配置
    optimizer = tf.keras.optimizers.Adam(
        learning_rate=tf.keras.optimizers.schedules.ExponentialDecay(
            initial_learning_rate=0.001,
            decay_steps=10000,
            decay_rate=0.96)
    )

    model.compile(
        optimizer=optimizer,
        loss={
            'regression': financial_loss,
            'classification': 'binary_crossentropy'
        },
        loss_weights=[0.7, 0.3],
        weighted_metrics={'regression': ['mae']}
    )
    return model

# ==== 動態風險控制 ====
class RiskController:
    """實時風險管理模塊"""
    def __init__(self, max_drawdown=0.15, position_limit=0.1):
        self.max_drawdown = max_drawdown
        self.position_limit = position_limit
        self.cumulative_returns = []

    def adjust_position(self, current_pred, portfolio_value):
        # 動態頭寸調整
        recent_max = np.max(self.cumulative_returns[-100:] or [0])
        current_drawdown = (recent_max - portfolio_value) / recent_max if recent_max > 0 else 0

        if current_drawdown > self.max_drawdown:
            return 0  # 停止交易
        else:
            risk_multiplier = 1 - (current_drawdown / self.max_drawdown)
            return current_pred * risk_multiplier * self.position_limit

# ==== 集成策略強化 ====
class DynamicEnsemble:
    """動態模型集成模塊"""
    def __init__(self, models):
        self.models = models
        self.model_weights = np.ones(len(models)) / len(models)

    def update_weights(self, recent_performance):
        # 基於近期表現調整權重
        performance_softmax = np.exp(recent_performance) / np.sum(np.exp(recent_performance))
        self.model_weights = 0.8 * self.model_weights + 0.2 * performance_softmax

    def predict(self, X):
        predictions = []
        for model, weight in zip(self.models, self.model_weights):
            pred = model.predict(X) * weight
            predictions.append(pred)
        return np.sum(predictions, axis=0)

# ==== 主程式流程 ====
if __name__ == "__main__":
    # 數據載入與預處理
    data = pd.read_csv("./train.csv", parse_dates=['date'])
    data = AdvancedFeatureEngineer().process(data)

    # 時序分割
    split_idx = int(len(data)*0.8)
    train_data = data.iloc[:split_idx]
    test_data = data.iloc[split_idx:]

    # 特徵工程
    feature_columns = [col for col in data.columns if 'feature_' in col] + ['MA_10', 'RSI_14', 'Volatility_20']
    X_train = train_data[feature_columns]
    X_test = test_data[feature_columns]

    # 數據標準化
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # 重塑數據結構供時序模型使用
    seq_length = 10
    X_train_3d = []
    for i in range(seq_length, len(X_train_scaled)):
        X_train_3d.append(X_train_scaled[i-seq_length:i])
    X_train_3d = np.array(X_train_3d)

    # 模型訓練
    model = build_hybrid_model(input_shape=(seq_length, X_train_3d.shape[2]))
    history = model.fit(
        X_train_3d,
        {
            'regression': train_data['resp'].values[seq_length:],
            'classification': (train_data['resp'] > 0).astype(int).values[seq_length:]
        },
        sample_weight=train_data['weight'].values[seq_length:],
        epochs=100,
        batch_size=256,
        validation_split=0.2,
        callbacks=[
            tf.keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True),
            tf.keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=5)
        ]
    )

    # 風險控制初始化
    risk_manager = RiskController()

    # 動態交易執行
    portfolio_value = 1.0
    positions = []
    for i in tqdm(range(len(X_test_scaled))):
        # 時序數據構建
        if i < seq_length:
            continue
        seq_data = X_test_scaled[i-seq_length:i].reshape(1, seq_length, -1)

        # 模型預測
        reg_pred, cls_pred = model.predict(seq_data)

        # 風險調整
        adjusted_pred = risk_manager.adjust_position(reg_pred[0][0], portfolio_value)

        # 執行交易
        if cls_pred[0][0] > 0.65 and adjusted_pred != 0:
            position = adjusted_pred * portfolio_value
            portfolio_value *= (1 + position * test_data['resp'].iloc[i])
            positions.append(position)

    print(f"最終投資組合價值: {portfolio_value:.2f}")
    plt.plot(np.cumprod([1 + p * r for p, r in zip(positions, test_data['resp'])]))
    plt.title("Portfolio Growth")
    plt.show()

Collecting pandas==2.1.4
  Downloading pandas-2.1.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)
Collecting scikit-learn==1.3.2
  Downloading scikit_learn-1.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Collecting xgboost==2.0.3
  Downloading xgboost-2.0.3-py3-none-manylinux2014_x86_64.whl.metadata (2.0 kB)
Collecting tensorflow==2.15.0
  Downloading tensorflow-2.15.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.4 kB)
Collecting numpy==1.26.0
  Downloading numpy-1.26.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.5/58.5 kB[0m [31m370.7 kB/s[0m eta [36m0:00:00[0m
[?25hCollecting keras-tcn
  Downloading keras_tcn-3.5.6-py3-none-any.whl.metadata (13 kB)
Collecting ml-dtypes~=0.2.0 (from tensorflow==2.15.0)
  Downloading ml_dtypes-0.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.meta

ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject