In [1]:
!pip install xgboost -qq


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.datasets import load_breast_cancer
from xgboost import XGBClassifier


In [3]:
data = load_breast_cancer()

X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)

print("Breast Cancer Dataset Loaded Successfully!")
print(f"Total samples: {X.shape[0]}")
print(f"Total features: {X.shape[1]}")


Breast Cancer Dataset Loaded Successfully!
Total samples: 569
Total features: 30


In [4]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

print(f"Training samples: {X_train.shape[0]}")
print(f"Testing samples: {X_test.shape[0]}")


Training samples: 398
Testing samples: 171


In [5]:
tree_model = DecisionTreeClassifier(
    random_state=42,
    max_depth=4
)

tree_model.fit(X_train, y_train)

y_pred_tree = tree_model.predict(X_test)
accuracy_tree = accuracy_score(y_test, y_pred_tree)

print(f"\nSingle Decision Tree Accuracy: {accuracy_tree:.4f}")



Single Decision Tree Accuracy: 0.9532


In [6]:
xgb_model = XGBClassifier(
    n_estimators=100,
    learning_rate=0.1,
    eval_metric='logloss',
    random_state=42
)

xgb_model.fit(X_train, y_train)

y_pred_xgb = xgb_model.predict(X_test)
accuracy_xgb = accuracy_score(y_test, y_pred_xgb)

print(f"XGBoost Ensemble Accuracy: {accuracy_xgb:.4f}")


XGBoost Ensemble Accuracy: 0.9591


In [7]:
print("\nüèÜ Model Comparison")
print("-" * 30)
print(f"Decision Tree Accuracy: {accuracy_tree:.4f}")
print(f"XGBoost Accuracy:       {accuracy_xgb:.4f}")



üèÜ Model Comparison
------------------------------
Decision Tree Accuracy: 0.9532
XGBoost Accuracy:       0.9591


In [8]:
print("\nüîé XGBoost Classification Report")
print(classification_report(
    y_test,
    y_pred_xgb,
    target_names=["Malignant", "Benign"]
))



üîé XGBoost Classification Report
              precision    recall  f1-score   support

   Malignant       0.95      0.94      0.94        63
      Benign       0.96      0.97      0.97       108

    accuracy                           0.96       171
   macro avg       0.96      0.95      0.96       171
weighted avg       0.96      0.96      0.96       171



In [9]:
importance = xgb_model.feature_importances_
feature_importances = pd.Series(
    importance,
    index=X.columns
).sort_values(ascending=False)

print("\n‚ú® Top 5 Important Features")
print(feature_importances.head(5))



‚ú® Top 5 Important Features
mean concave points     0.467806
worst concave points    0.136023
worst perimeter         0.057378
worst radius            0.046021
worst area              0.030302
dtype: float32
