In [3]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import GridSearchCV

# 加载数据集
data = load_breast_cancer()
X = data.data
y = data.target

# 数据集分割
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# 配置决策树模型
dt_classifier = DecisionTreeClassifier(random_state=42)

# 训练模型
dt_classifier.fit(X_train, y_train)

# 模型预测
predictions = dt_classifier.predict(X_test)

# 模型评估
accuracy = accuracy_score(y_test, predictions)
report = classification_report(y_test, predictions)
conf_matrix = confusion_matrix(y_test, predictions)

print('准确率：', accuracy, '\n分类报告：\n', report, '\n混淆矩阵：\n', conf_matrix)


准确率： 0.9415204678362573 
分类报告：
               precision    recall  f1-score   support

           0       0.90      0.95      0.92        63
           1       0.97      0.94      0.95       108

    accuracy                           0.94       171
   macro avg       0.93      0.94      0.94       171
weighted avg       0.94      0.94      0.94       171
 
混淆矩阵：
 [[ 60   3]
 [  7 101]]


# 参数调优

In [4]:
# 参数调优
parameters = {'max_depth': [3, 5, 7, 10], 'min_samples_split': [2, 4, 6], 'min_samples_leaf': [1, 2, 3]}
grid_search = GridSearchCV(dt_classifier, parameters, cv=5)
grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
best_score = grid_search.best_score_

print('\n最佳参数：', best_params, '\n最佳得分：', best_score)


最佳参数： {'max_depth': 3, 'min_samples_leaf': 1, 'min_samples_split': 2} 
最佳得分： 0.9246518987341773


In [6]:
# 应用参数调优的最佳参数
optimized_dt_classifier = DecisionTreeClassifier(**best_params, random_state=42)

# 重新训练模型
optimized_dt_classifier.fit(X_train, y_train)

# 使用优化后的模型进行预测
optimized_predictions = optimized_dt_classifier.predict(X_test)

# 重新评估模型准确率
optimized_accuracy = accuracy_score(y_test, optimized_predictions)
optimized_report = classification_report(y_test, optimized_predictions)
optimized_conf_matrix = confusion_matrix(y_test, optimized_predictions)


print('准确率：', optimized_accuracy, '\n分类报告：\n', optimized_report, '\n混淆矩阵：\n', optimized_conf_matrix)

准确率： 0.9649122807017544 
分类报告：
               precision    recall  f1-score   support

           0       0.97      0.94      0.95        63
           1       0.96      0.98      0.97       108

    accuracy                           0.96       171
   macro avg       0.97      0.96      0.96       171
weighted avg       0.96      0.96      0.96       171
 
混淆矩阵：
 [[ 59   4]
 [  2 106]]
