In [2]:
import optuna
from sklearn.datasets import load_iris
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
import numpy as np
from training import *


In [3]:
features = ['macd', 'rsi', 'k', 'd', 'r', 'close',
            'before1day', 'before2day', 'before3day', 'before4day','before5day']
target = 'dailyreturn'
X_train, y_train = dataset_loading(features, target, './taiex_good_train_data.csv')
X_test, y_test = dataset_loading(features, target, './taiex_good_test_data.csv')
y_train = process_dataset(y_train, target)
y_test = process_dataset(y_test, target)

In [None]:


# Optuna 的目標函數：輸出交叉驗證分數（平均準確率）
def objective(trial):
    criterion = trial.suggest_categorical('criterion', ['gini', 'entropy'])
    max_depth = trial.suggest_int('max_depth', 200, 300)
    min_samples_split = trial.suggest_int('min_samples_split', 20, 30)
    min_samples_leaf = trial.suggest_int('min_samples_leaf', 20, 30)

    clf = DecisionTreeClassifier(
        criterion=criterion,
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf,
        random_state=42
    )
    
    # 使用 5 折交叉驗證來評估模型表現
    score = cross_val_score(clf, X_train, y_train, cv=5, scoring='accuracy')
    return score.mean()

# 建立並開始搜尋
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=1000)  # 可自行調整搜尋次數

# 最佳結果
print("最佳參數組合：", study.best_params)
print("最佳交叉驗證分數：", study.best_value)

# 以最佳參數訓練最終模型並測試
best_clf = DecisionTreeClassifier(**study.best_params, random_state=42)
best_clf.fit(X_train, y_train)
y_pred = best_clf.predict(X_test)
print("測試集準確率：", accuracy_score(y_test, y_pred))


[I 2025-06-27 01:53:11,428] A new study created in memory with name: no-name-7eed6504-1736-43dd-8eb7-e995f90a71c5
[I 2025-06-27 01:53:11,460] Trial 0 finished with value: 0.8389830508474576 and parameters: {'criterion': 'gini', 'max_depth': 257, 'min_samples_split': 27, 'min_samples_leaf': 27}. Best is trial 0 with value: 0.8389830508474576.
[I 2025-06-27 01:53:11,501] Trial 1 finished with value: 0.8415254237288134 and parameters: {'criterion': 'entropy', 'max_depth': 203, 'min_samples_split': 22, 'min_samples_leaf': 30}. Best is trial 1 with value: 0.8415254237288134.
[I 2025-06-27 01:53:11,540] Trial 2 finished with value: 0.8567796610169491 and parameters: {'criterion': 'entropy', 'max_depth': 259, 'min_samples_split': 26, 'min_samples_leaf': 21}. Best is trial 2 with value: 0.8567796610169491.
[I 2025-06-27 01:53:11,571] Trial 3 finished with value: 0.8389830508474576 and parameters: {'criterion': 'gini', 'max_depth': 239, 'min_samples_split': 29, 'min_samples_leaf': 29}. Best is 

KeyboardInterrupt: 

In [5]:
dt_model = DecisionTreeClassifier(
    criterion = 'gini',
    max_depth = 7
    )
dt_model.fit(X_train, y_train)

y_pred = dt_model.predict(X_test)
print("測試集準確率：", accuracy_score(y_test, y_pred))
features_len = len(np.where(dt_model.feature_importances_ > 0)[0])
features_len

測試集準確率： 0.7878787878787878


10