In [None]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

print(f"Training samples: {X_train.shape[0]}, Testing samples: {X_test.shape[0]}")

Training samples: 398, Testing samples: 171


In [None]:
# 1. Initialize the single Decision Tree Classifier
tree_model = DecisionTreeClassifier(random_state=42, max_depth=4)

# 2. Train the model
tree_model.fit(X_train, y_train)

# 3. Predict and evaluate
y_pred_tree = tree_model.predict(X_test)
accuracy_tree = accuracy_score(y_test, y_pred_tree)

print(f"Single Decision Tree Accuracy: {accuracy_tree:.4f}")

Single Decision Tree Accuracy: 0.9532


In [None]:
# 1. Initialize the XGBoost Classifier
# n_estimators=100 is the number of trees (boosting rounds)
# learning_rate controls how much each tree 'corrects' the error
xgb_model = XGBClassifier(
    n_estimators=100,
    learning_rate=0.1,
    use_label_encoder=False, # Suppresses a common warning
    eval_metric='logloss',   # Sets the evaluation metric for classification
    random_state=42
)

# 2. Train the ensemble model (Boosting)
xgb_model.fit(X_train, y_train)

# 3. Predict and evaluate
y_pred_xgb = xgb_model.predict(X_test)
accuracy_xgb = accuracy_score(y_test, y_pred_xgb)

print(f"XGBoost (Ensemble) Accuracy: {accuracy_xgb:.4f}")

XGBoost (Ensemble) Accuracy: 0.9591


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.datasets import load_breast_cancer

# Load the Breast Cancer dataset (Binary Classification)
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# 1. Initialize the single Decision Tree Classifier
tree_model = DecisionTreeClassifier(random_state=42, max_depth=4)

# 2. Train the model
tree_model.fit(X_train, y_train)

# 3. Predict and evaluate
y_pred_tree = tree_model.predict(X_test)
accuracy_tree = accuracy_score(y_test, y_pred_tree)

# 1. Initialize the XGBoost Classifier
# n_estimators=100 is the number of trees (boosting rounds)
# learning_rate controls how much each tree 'corrects' the error
xgb_model = XGBClassifier(
    n_estimators=100,
    learning_rate=0.1,
    use_label_encoder=False, # Suppresses a common warning
    eval_metric='logloss',   # Sets the evaluation metric for classification
    random_state=42
)

# 2. Train the ensemble model (Boosting)
xgb_model.fit(X_train, y_train)

# 3. Predict and evaluate
y_pred_xgb = xgb_model.predict(X_test)
accuracy_xgb = accuracy_score(y_test, y_pred_xgb)

print("\n## üèÜ Model Comparison")
print("-" * 30)
print(f"Single Decision Tree Accuracy: {accuracy_tree:.4f}")
print(f"XGBoost Ensemble Accuracy:     {accuracy_xgb:.4f}")

# Detailed report for the superior model (XGBoost)
print("\n## üîé XGBoost Classification Report")
print(classification_report(y_test, y_pred_xgb, target_names=['Malignant', 'Benign']))

# Visualize Feature Importance (A key insight from tree-based models)
importance = xgb_model.feature_importances_
feature_importances = pd.Series(importance, index=X.columns).sort_values(ascending=False)

print("\n## ‚ú® Top Feature Importances (Most Predictive Features)")
print(feature_importances.head(5))


## üèÜ Model Comparison
------------------------------
Single Decision Tree Accuracy: 0.9532
XGBoost Ensemble Accuracy:     0.9591

## üîé XGBoost Classification Report
              precision    recall  f1-score   support

   Malignant       0.95      0.94      0.94        63
      Benign       0.96      0.97      0.97       108

    accuracy                           0.96       171
   macro avg       0.96      0.95      0.96       171
weighted avg       0.96      0.96      0.96       171


## ‚ú® Top Feature Importances (Most Predictive Features)
mean concave points     0.467806
worst concave points    0.136023
worst perimeter         0.057378
worst radius            0.046021
worst area              0.030302
dtype: float32


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


In [None]:
# Install XGBoost (if needed)
!pip install xgboost -qq

# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.datasets import load_breast_cancer

# Load the Breast Cancer dataset (Binary Classification)
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)

print("Breast Cancer Dataset Loaded Successfully!")
print(f"Feature names: {list(X.columns)}")