In [1]:
import numpy as np  
import pandas as pd  
from sklearn.model_selection import GridSearchCV  
from sklearn.datasets import load_iris  
from xgboost import XGBClassifier
import matplotlib  
matplotlib.use('TkAgg')  # 指定使用 TkAgg 后端  
import matplotlib.pyplot as plt  
from mpl_toolkits.mplot3d import Axes3D  

In [3]:
# 加载数据集  
   

In [5]:
# 设置参数网格  
param_grid = {  
    'learning_rate': [0.1, 0.3, 0.5, 0.7, 0.9],  
    'max_depth': [1, 5, 10, 20, 50],  
    'n_estimators': [1, 5, 10, 20, 50]  
}  

In [7]:
# 初始化XGBoost分类器  
xgb_clf = XGBClassifier(objective='multi:softmax', num_class=6)  

In [9]:
# 使用网格搜索找到最佳参数  
grid_search = GridSearchCV(xgb_clf, param_grid, cv=10, scoring='accuracy', verbose=1)  
grid_search.fit(feature1, target1)  

Fitting 10 folds for each of 125 candidates, totalling 1250 fits


In [10]:
# 获取最佳参数组合和最佳得分  
best_params = grid_search.best_params_  
best_score = grid_search.best_score_  

In [11]:
# 提取网格搜索的所有结果  
results = grid_search.cv_results_  

In [12]:
# 创建一个DataFrame来存储结果  
param_results = pd.DataFrame(results)  

In [13]:
# 为了可视化，我们可以选择绘制三维图或者多个二维图。  
# 这里我们选择一个简单的三维图来展示。  
fig = plt.figure()  
ax = fig.add_subplot(111, projection='3d')  

In [14]:
# 绘制三维散点图，其中每个点的颜色和大小可以根据平均测试分数来调整  
sc = ax.scatter(param_results['param_learning_rate'], param_results['param_max_depth'], param_results['param_n_estimators'], c=param_results['mean_test_score'], cmap='viridis', s=60)  

In [15]:
# 添加颜色条，显示颜色与z值之间的关系  
plt.colorbar(sc, ax=ax, label='mean_test_score') 
# 设置坐标轴标签  
ax.set_xlabel('Learning Rate')  
ax.set_ylabel('Max Depth')  
ax.set_zlabel('N Estimators')  
ax.set_title('XGBoost Parameter Search Results')  
print (best_params)
# 显示图例  
plt.show()

{'learning_rate': 0.7, 'max_depth': 50, 'n_estimators': 20}


In [16]:
#测试集准确率
model = XGBClassifier(learning_rate=0.1, max_depth=5, n_estimators=5)
s = model.fit(feature1, target1)
r1 = model.score(feature1, target1)
r2 = model.score(feature2, target2)
print (r1)
print (r2)
predicted = model.predict(feature2)
predicted

0.9142327306444136
0.7857142857142857


array([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 3, 3, 3, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
       1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 1],
      dtype=int64)

In [17]:
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score  
   
# 对于多分类问题，你需要选择一个适合多分类问题的average参数，如'micro', 'macro', 'weighted'  
precision = precision_score(target2, predicted, average='macro')  # 或者 'micro', 'weighted'  
recall = recall_score(target2, predicted, average='macro')  # 或者 'micro', 'weighted'  
f1 = f1_score(target2, predicted, average='macro')  # 或者 'micro', 'weighted'  
  
# 对于 AUC-ROC，你需要模型预测的概率值，而不是预测的标签  
# 这里没有average参数的问题，但是要确保y_true和y_prob的维度匹配  
# 假设 y_true 是真实的标签，y_prob 是模型预测的概率（对于多分类问题，y_prob应该是每个类别的预测概率）  
# auc_roc = roc_auc_score(target2, predicted, multi_class='ovo')  # 或者'ovr'，取决于你的具体需求和模型输出的格式

In [18]:
print (precision)
print (recall)
print (f1)

0.8552053140096618
0.7405827505827507
0.7536131851190029


In [19]:
# 可视化特征重要性
feature_importances = model.feature_importances_
features = feature1.columns
plt.figure(figsize=(10, 6))
plt.barh(features, feature_importances, color='r', align='center')
plt.xlabel('Feature Importance')
plt.ylabel('Features')
plt.title('Feature Importance in XGBoost Model')
plt.gca().invert_yaxis()  # 反转y轴，使特征按重要性从上到下排列
plt.show()

  func(*args)
  func(*args)
  func(*args)
  func(*args)
  func(*args)
  func(*args)
  func(*args)
  func(*args)
  func(*args)
  func(*args)
  func(*args)
  func(*args)
  func(*args)
  func(*args)
  func(*args)
  func(*args)
  func(*args)
  func(*args)
  func(*args)
  func(*args)
  func(*args)
  func(*args)
  func(*args)
  func(*args)
  func(*args)
  func(*args)
  func(*args)
  return self.func(*args)
  return self.func(*args)
  return self.func(*args)
  return self.func(*args)
  return self.func(*args)
  return self.func(*args)
  return self.func(*args)
  return self.func(*args)
  return self.func(*args)
  return self.func(*args)
  return self.func(*args)
  return self.func(*args)
  return self.func(*args)
  return self.func(*args)
  return self.func(*args)
  return self.func(*args)
  return self.func(*args)
  return self.func(*args)
  return self.func(*args)
  return self.func(*args)
  return self.func(*args)
  return self.func(*args)
  return self.func(*args)
  return self.func(*args