测试Python梯度提升树。

# GBM

## 基础使用

In [1]:
# 测试GBM
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier

# 载入数据
data = load_breast_cancer()
X = data.data
y = data.target

# 划分数据集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 创建GBM模型
gbm = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)

# 训练模型
gbm.fit(X_train, y_train)

# 评估模型
accuracy = gbm.score(X_test, y_test)
print(f"模型准确率: {accuracy:.2f}")

模型准确率: 0.96


## 调参

In [None]:
# 调参
from sklearn.model_selection import GridSearchCV

# 定义参数网格
param_grid = {
    'n_estimators': [50, 100, 150],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 4, 5]
}

# 创建网格搜索
grid_search = GridSearchCV(estimator=gbm, param_grid=param_grid, cv=5, scoring='accuracy')

# 执行网格搜索
grid_search.fit(X_train, y_train)

# 输出最佳参数和最佳得分
print("最佳参数:", grid_search.best_params_)
print("最佳得分:", grid_search.best_score_)

# 最佳参数: {'learning_rate': 0.2, 'max_depth': 4, 'n_estimators': 150}
# 最佳得分: 0.9648351648351647

最佳参数: {'learning_rate': 0.2, 'max_depth': 4, 'n_estimators': 150}
最佳得分: 0.9648351648351647


# HGBM

## 基础使用

In [3]:
# 测试HistGBM
from sklearn.ensemble import HistGradientBoostingClassifier

# 创建HistGBM模型
hist_gbm = HistGradientBoostingClassifier(max_iter=100, learning_rate=0.1, max_depth=3, random_state=42)

# 训练HistGBM模型
hist_gbm.fit(X_train, y_train)

# 评估HistGBM模型
hist_accuracy = hist_gbm.score(X_test, y_test)

print(f"HistGBM模型准确率: {hist_accuracy:.2f}")

HistGBM模型准确率: 0.96


## 调参

In [None]:
# 调参
from sklearn.model_selection import GridSearchCV

# 定义参数网格
param_grid = {
    'max_iter': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 4, 5]
}

# 创建网格搜索
grid_search_hist = GridSearchCV(estimator=hist_gbm, param_grid=param_grid, cv=5, scoring='accuracy')

# 执行网格搜索
grid_search_hist.fit(X_train, y_train)

# 输出最佳参数和最佳得分
print("HistGBM最佳参数:", grid_search_hist.best_params_)
print("HistGBM最佳得分:", grid_search_hist.best_score_)

# HistGBM最佳参数: {'learning_rate': 0.2, 'max_depth': 3, 'max_iter': 50}
# HistGBM最佳得分: 0.9692307692307693

HistGBM最佳参数: {'learning_rate': 0.2, 'max_depth': 3, 'max_iter': 50}
HistGBM最佳得分: 0.9692307692307693


# XGBoost

## 基础使用

In [10]:
# 测试XGBoost
import xgboost as xgb

# 创建XGBoost模型
xgb_model = xgb.XGBClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)

# 训练XGBoost模型
xgb_model.fit(X_train, y_train)

# 评估XGBoost模型
xgb_accuracy = xgb_model.score(X_test, y_test)
print(f"XGBoost模型准确率: {xgb_accuracy:.2f}")

XGBoost模型准确率: 0.96


## 调参

In [8]:
xgb_model.get_params()

{'objective': 'binary:logistic',
 'base_score': None,
 'booster': None,
 'callbacks': None,
 'colsample_bylevel': None,
 'colsample_bynode': None,
 'colsample_bytree': None,
 'device': None,
 'early_stopping_rounds': None,
 'enable_categorical': False,
 'eval_metric': None,
 'feature_types': None,
 'gamma': None,
 'grow_policy': None,
 'importance_type': None,
 'interaction_constraints': None,
 'learning_rate': 0.1,
 'max_bin': None,
 'max_cat_threshold': None,
 'max_cat_to_onehot': None,
 'max_delta_step': None,
 'max_depth': 3,
 'max_leaves': None,
 'min_child_weight': None,
 'missing': nan,
 'monotone_constraints': None,
 'multi_strategy': None,
 'n_estimators': 100,
 'n_jobs': None,
 'num_parallel_tree': None,
 'random_state': 42,
 'reg_alpha': None,
 'reg_lambda': None,
 'sampling_method': None,
 'scale_pos_weight': None,
 'subsample': None,
 'tree_method': None,
 'validate_parameters': None,
 'verbosity': None}

In [None]:
from sklearn.model_selection import cross_val_score

# 手动调参，调节n_estimators
param_range = [50, 100, 150]

# 创建一个空列表来存储每个n_estimators的准确率
accuracies = []

# 循环遍历每个n_estimators值
for n in param_range:
    # 训练集测试集
    xgb_model.set_params(n_estimators=n)
    xgb_model.fit(X_train, y_train)
    score = xgb_model.score(X_test, y_test)
    accuracies.append(score)

# 输出每个n_estimators的平均准确率
for n, acc in zip(param_range, accuracies):
    print(f"n_estimators={n}, 准确率={acc:.2f}")

# n_estimators=50, 准确率=0.96
# n_estimators=100, 准确率=0.96
# n_estimators=150, 准确率=0.96

n_estimators=50, 准确率=0.96
n_estimators=100, 准确率=0.96
n_estimators=150, 准确率=0.96


# LightGBM

## 基础使用

In [25]:
# 测试LightGBM
import lightgbm as lgb

# 创建LightGBM模型
lgb_model = lgb.LGBMClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)

# 训练LightGBM模型
lgb_model.fit(X_train, y_train)

# 评估LightGBM模型
lgb_accuracy = lgb_model.score(X_test, y_test)
print(f"LightGBM模型准确率: {lgb_accuracy:.2f}")

[LightGBM] [Info] Number of positive: 286, number of negative: 169
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000243 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4548
[LightGBM] [Info] Number of data points in the train set: 455, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.628571 -> initscore=0.526093
[LightGBM] [Info] Start training from score 0.526093
LightGBM模型准确率: 0.96




## 调参

In [26]:
lgb_model.get_params()

{'boosting_type': 'gbdt',
 'class_weight': None,
 'colsample_bytree': 1.0,
 'importance_type': 'split',
 'learning_rate': 0.1,
 'max_depth': 3,
 'min_child_samples': 20,
 'min_child_weight': 0.001,
 'min_split_gain': 0.0,
 'n_estimators': 100,
 'n_jobs': None,
 'num_leaves': 31,
 'objective': None,
 'random_state': 42,
 'reg_alpha': 0.0,
 'reg_lambda': 0.0,
 'subsample': 1.0,
 'subsample_for_bin': 200000,
 'subsample_freq': 0}

In [None]:
# 调参
from sklearn.model_selection import GridSearchCV

# 定义参数网格
param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 4, 5]
}

# 创建网格搜索
grid_search_lgb = GridSearchCV(estimator=lgb_model, param_grid=param_grid, cv=5, scoring='accuracy')

# 执行网格搜索
grid_search_lgb.fit(X_train, y_train)

# 输出最佳参数和最佳得分
print("LightGBM最佳参数:", grid_search_lgb.best_params_)
print("LightGBM最佳得分:", grid_search_lgb.best_score_)

# LightGBM最佳参数: {'learning_rate': 0.2, 'max_depth': 3, 'n_estimators': 200}
# LightGBM最佳得分: 0.9714285714285715



[LightGBM] [Info] Number of positive: 228, number of negative: 136
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000174 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3648
[LightGBM] [Info] Number of data points in the train set: 364, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.626374 -> initscore=0.516691
[LightGBM] [Info] Start training from score 0.516691
[LightGBM] [Info] Number of positive: 229, number of negative: 135
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000093 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3648
[LightGBM] [Info] Number of data points in the train set: 364, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.629121 -> initscore=0.528447
[LightGBM] [Info] Start training from score 0.528447
[LightGBM] [Info] Number of 



[LightGBM] [Info] Number of positive: 228, number of negative: 136
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000120 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3648
[LightGBM] [Info] Number of data points in the train set: 364, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.626374 -> initscore=0.516691
[LightGBM] [Info] Start training from score 0.516691
[LightGBM] [Info] Number of positive: 229, number of negative: 135
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000084 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3648
[LightGBM] [Info] Number of data points in the train set: 364, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.629121 -> initscore=0.528447
[LightGBM] [Info] Start training from score 0.528447
[LightGBM] [Info] Number of 



[LightGBM] [Info] Number of positive: 229, number of negative: 135
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000111 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3648
[LightGBM] [Info] Number of data points in the train set: 364, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.629121 -> initscore=0.528447
[LightGBM] [Info] Start training from score 0.528447
[LightGBM] [Info] Number of positive: 229, number of negative: 135
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000128 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3642
[LightGBM] [Info] Number of data points in the train set: 364, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.629121 -> initscore=0.528447
[LightGBM] [Info] Start training from score 0.528447
[LightGBM] [Info] Number of 



[LightGBM] [Info] Number of positive: 229, number of negative: 135
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000097 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3648
[LightGBM] [Info] Number of data points in the train set: 364, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.629121 -> initscore=0.528447
[LightGBM] [Info] Start training from score 0.528447
[LightGBM] [Info] Number of positive: 229, number of negative: 135
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000152 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3648
[LightGBM] [Info] Number of data points in the train set: 364, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.629121 -> initscore=0.528447
[LightGBM] [Info] Start training from score 0.528447
[LightGBM] [Info] Number of 



[LightGBM] [Info] Number of positive: 229, number of negative: 135
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000128 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3648
[LightGBM] [Info] Number of data points in the train set: 364, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.629121 -> initscore=0.528447
[LightGBM] [Info] Start training from score 0.528447
[LightGBM] [Info] Number of positive: 229, number of negative: 135
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000149 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3648
[LightGBM] [Info] Number of data points in the train set: 364, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.629121 -> initscore=0.528447
[LightGBM] [Info] Start training from score 0.528447
[LightGBM] [Info] Number of 



[LightGBM] [Info] Number of positive: 229, number of negative: 135
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000129 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3648
[LightGBM] [Info] Number of data points in the train set: 364, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.629121 -> initscore=0.528447
[LightGBM] [Info] Start training from score 0.528447
[LightGBM] [Info] Number of positive: 229, number of negative: 135
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000132 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3648
[LightGBM] [Info] Number of data points in the train set: 364, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.629121 -> initscore=0.528447
[LightGBM] [Info] Start training from score 0.528447
[LightGBM] [Info] Number of 



[LightGBM] [Info] Number of positive: 229, number of negative: 135
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000107 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3648
[LightGBM] [Info] Number of data points in the train set: 364, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.629121 -> initscore=0.528447
[LightGBM] [Info] Start training from score 0.528447
[LightGBM] [Info] Number of positive: 229, number of negative: 135
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000120 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3648
[LightGBM] [Info] Number of data points in the train set: 364, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.629121 -> initscore=0.528447
[LightGBM] [Info] Start training from score 0.528447




[LightGBM] [Info] Number of positive: 229, number of negative: 135
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000232 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3642
[LightGBM] [Info] Number of data points in the train set: 364, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.629121 -> initscore=0.528447
[LightGBM] [Info] Start training from score 0.528447




[LightGBM] [Info] Number of positive: 228, number of negative: 136
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000124 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3648
[LightGBM] [Info] Number of data points in the train set: 364, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.626374 -> initscore=0.516691
[LightGBM] [Info] Start training from score 0.516691
[LightGBM] [Info] Number of positive: 229, number of negative: 135
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000123 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3648
[LightGBM] [Info] Number of data points in the train set: 364, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.629121 -> initscore=0.528447
[LightGBM] [Info] Start training from score 0.528447
[LightGBM] [Info] Number of 



[LightGBM] [Info] Number of positive: 229, number of negative: 135
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000158 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3648
[LightGBM] [Info] Number of data points in the train set: 364, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.629121 -> initscore=0.528447
[LightGBM] [Info] Start training from score 0.528447
[LightGBM] [Info] Number of positive: 229, number of negative: 135
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000105 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3648
[LightGBM] [Info] Number of data points in the train set: 364, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.629121 -> initscore=0.528447
[LightGBM] [Info] Start training from score 0.528447
[LightGBM] [Info] Number of 



[LightGBM] [Info] Number of positive: 229, number of negative: 135
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000171 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3648
[LightGBM] [Info] Number of data points in the train set: 364, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.629121 -> initscore=0.528447
[LightGBM] [Info] Start training from score 0.528447
[LightGBM] [Info] Number of positive: 229, number of negative: 135
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000190 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3642
[LightGBM] [Info] Number of data points in the train set: 364, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.629121 -> initscore=0.528447
[LightGBM] [Info] Start training from score 0.528447
[LightGBM] [Info] Number of 



[LightGBM] [Info] Number of positive: 229, number of negative: 135
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000120 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3648
[LightGBM] [Info] Number of data points in the train set: 364, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.629121 -> initscore=0.528447
[LightGBM] [Info] Start training from score 0.528447
[LightGBM] [Info] Number of positive: 229, number of negative: 135
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000094 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3648
[LightGBM] [Info] Number of data points in the train set: 364, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.629121 -> initscore=0.528447
[LightGBM] [Info] Start training from score 0.528447
[LightGBM] [Info] Number of 



[LightGBM] [Info] Number of positive: 229, number of negative: 135
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000103 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3648
[LightGBM] [Info] Number of data points in the train set: 364, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.629121 -> initscore=0.528447
[LightGBM] [Info] Start training from score 0.528447
[LightGBM] [Info] Number of positive: 229, number of negative: 135
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000093 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3648
[LightGBM] [Info] Number of data points in the train set: 364, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.629121 -> initscore=0.528447
[LightGBM] [Info] Start training from score 0.528447
[LightGBM] [Info] Number of 



[LightGBM] [Info] Number of positive: 228, number of negative: 136
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000118 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3648
[LightGBM] [Info] Number of data points in the train set: 364, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.626374 -> initscore=0.516691
[LightGBM] [Info] Start training from score 0.516691
[LightGBM] [Info] Number of positive: 229, number of negative: 135
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000113 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3648
[LightGBM] [Info] Number of data points in the train set: 364, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.629121 -> initscore=0.528447
[LightGBM] [Info] Start training from score 0.528447
[LightGBM] [Info] Number of 



[LightGBM] [Info] Number of positive: 228, number of negative: 136
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000120 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3648
[LightGBM] [Info] Number of data points in the train set: 364, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.626374 -> initscore=0.516691
[LightGBM] [Info] Start training from score 0.516691
[LightGBM] [Info] Number of positive: 229, number of negative: 135
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000158 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3648
[LightGBM] [Info] Number of data points in the train set: 364, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.629121 -> initscore=0.528447
[LightGBM] [Info] Start training from score 0.528447
[LightGBM] [Info] Number of 



[LightGBM] [Info] Number of positive: 229, number of negative: 135
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000107 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3648
[LightGBM] [Info] Number of data points in the train set: 364, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.629121 -> initscore=0.528447
[LightGBM] [Info] Start training from score 0.528447
[LightGBM] [Info] Number of positive: 229, number of negative: 135
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000108 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3642
[LightGBM] [Info] Number of data points in the train set: 364, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.629121 -> initscore=0.528447
[LightGBM] [Info] Start training from score 0.528447
[LightGBM] [Info] Number of 



[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.626374 -> initscore=0.516691
[LightGBM] [Info] Start training from score 0.516691
[LightGBM] [Info] Number of positive: 229, number of negative: 135
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000115 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3648
[LightGBM] [Info] Number of data points in the train set: 364, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.629121 -> initscore=0.528447
[LightGBM] [Info] Start training from score 0.528447
[LightGBM] [Info] Number of positive: 229, number of negative: 135
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000117 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3648
[LightGBM] [Info] Number of data points in the train set: 364, number of used features: 30
[LightGBM] [Info] [binary:Bo



[LightGBM] [Info] Number of positive: 229, number of negative: 135
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000174 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3648
[LightGBM] [Info] Number of data points in the train set: 364, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.629121 -> initscore=0.528447
[LightGBM] [Info] Start training from score 0.528447
[LightGBM] [Info] Number of positive: 229, number of negative: 135
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000103 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3648
[LightGBM] [Info] Number of data points in the train set: 364, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.629121 -> initscore=0.528447
[LightGBM] [Info] Start training from score 0.528447
[LightGBM] [Info] Number of 



[LightGBM] [Info] Number of positive: 229, number of negative: 135
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000184 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3648
[LightGBM] [Info] Number of data points in the train set: 364, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.629121 -> initscore=0.528447
[LightGBM] [Info] Start training from score 0.528447
[LightGBM] [Info] Number of positive: 229, number of negative: 135
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000112 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3642
[LightGBM] [Info] Number of data points in the train set: 364, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.629121 -> initscore=0.528447
[LightGBM] [Info] Start training from score 0.528447
[LightGBM] [Info] Number of 



[LightGBM] [Info] Number of positive: 229, number of negative: 135
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000130 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3648
[LightGBM] [Info] Number of data points in the train set: 364, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.629121 -> initscore=0.528447
[LightGBM] [Info] Start training from score 0.528447
[LightGBM] [Info] Number of positive: 229, number of negative: 135
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000102 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3642
[LightGBM] [Info] Number of data points in the train set: 364, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.629121 -> initscore=0.528447
[LightGBM] [Info] Start training from score 0.528447
[LightGBM] [Info] Number of 



[LightGBM] [Info] Number of positive: 229, number of negative: 135
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000152 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3648
[LightGBM] [Info] Number of data points in the train set: 364, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.629121 -> initscore=0.528447
[LightGBM] [Info] Start training from score 0.528447
[LightGBM] [Info] Number of positive: 229, number of negative: 135
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000170 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3648
[LightGBM] [Info] Number of data points in the train set: 364, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.629121 -> initscore=0.528447
[LightGBM] [Info] Start training from score 0.528447
[LightGBM] [Info] Number of 



[LightGBM] [Info] Number of positive: 229, number of negative: 135
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000172 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3648
[LightGBM] [Info] Number of data points in the train set: 364, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.629121 -> initscore=0.528447
[LightGBM] [Info] Start training from score 0.528447
[LightGBM] [Info] Number of positive: 229, number of negative: 135
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000122 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3648
[LightGBM] [Info] Number of data points in the train set: 364, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.629121 -> initscore=0.528447
[LightGBM] [Info] Start training from score 0.528447
[LightGBM] [Info] Number of 



[LightGBM] [Info] Number of positive: 229, number of negative: 135
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000197 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3642
[LightGBM] [Info] Number of data points in the train set: 364, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.629121 -> initscore=0.528447
[LightGBM] [Info] Start training from score 0.528447
[LightGBM] [Info] Number of positive: 228, number of negative: 136
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000130 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3648
[LightGBM] [Info] Number of data points in the train set: 364, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.626374 -> initscore=0.516691
[LightGBM] [Info] Start training from score 0.516691
[LightGBM] [Info] Number of 



[LightGBM] [Info] Number of positive: 229, number of negative: 135
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000183 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3642
[LightGBM] [Info] Number of data points in the train set: 364, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.629121 -> initscore=0.528447
[LightGBM] [Info] Start training from score 0.528447
[LightGBM] [Info] Number of positive: 286, number of negative: 169
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000132 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4548
[LightGBM] [Info] Number of data points in the train set: 455, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.628571 -> initscore=0.526093
[LightGBM] [Info] Start training from score 0.526093
LightGBM最佳参数: {'learning_rat



# CatBoost

## 基础使用

In [28]:
# 测试CatBoost
from catboost import CatBoostClassifier

# 创建CatBoost模型
catboost_model = CatBoostClassifier(iterations=100, learning_rate=0.1, depth=3, random_state=42, verbose=0)

# 训练CatBoost模型
catboost_model.fit(X_train, y_train)

# 评估CatBoost模型
catboost_accuracy = catboost_model.score(X_test, y_test)
print(f"CatBoost模型准确率: {catboost_accuracy:.2f}")

CatBoost模型准确率: 0.97




## 调参

In [30]:
catboost_model.get_params()

{'iterations': 100,
 'learning_rate': 0.1,
 'depth': 3,
 'verbose': 0,
 'random_state': 42}

In [None]:
# 调参
from sklearn.model_selection import GridSearchCV

# 定义参数网格
param_grid = {
    'iterations': [50, 100, 150],
    'learning_rate': [0.01, 0.1, 0.2],
    'depth': [3, 4, 5]
}

# 创建网格搜索
grid_search = GridSearchCV(estimator=catboost_model, param_grid=param_grid, cv=5, scoring='accuracy')

# 执行网格搜索
grid_search.fit(X_train, y_train)

# 输出最佳参数和最佳得分
print("CatBoost最佳参数:", grid_search.best_params_)
print("CatBoost最佳得分:", grid_search.best_score_)

# CatBoost最佳参数: {'depth': 3, 'iterations': 150, 'learning_rate': 0.2}
# CatBoost最佳得分: 0.9758241758241759

CatBoost最佳参数: {'depth': 3, 'iterations': 150, 'learning_rate': 0.2}
CatBoost最佳得分: 0.9758241758241759
