In [3]:
!pip install xgboost

Collecting xgboost
  Using cached xgboost-3.0.2-py3-none-win_amd64.whl.metadata (2.1 kB)
Using cached xgboost-3.0.2-py3-none-win_amd64.whl (150.0 MB)
Installing collected packages: xgboost
Successfully installed xgboost-3.0.2


In [1]:
#Bagging and Boosting - Breast Cancer Dataset

#Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

#Load the dataset
df = pd.read_csv("C:/Users/kondu/Downloads/breast-cancer.csv")  # Make sure this file is in the same folder

#Preprocessing
df.drop(columns=["id"], inplace=True)  # Drop the 'id' column
df["diagnosis"] = df["diagnosis"].map({"M": 1, "B": 0})  # Encode target

#Feature-target split
X = df.drop(columns=["diagnosis"])
y = df["diagnosis"]

#Feature scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

#Split the dataset (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

#Define the models
models = {
    "Random Forest": RandomForestClassifier(random_state=42),
    "AdaBoost": AdaBoostClassifier(random_state=42),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
}

#Train and evaluate each model
for name, model in models.items():
    print(f"\n Model: {name}")
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    # Metrics
    acc = accuracy_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    
    print(f" Accuracy: {acc:.4f}")
    print(" Confusion Matrix:\n", cm)
    print(" Classification Report:\n", report)




 Model: Random Forest
 Accuracy: 0.9649
 Confusion Matrix:
 [[70  1]
 [ 3 40]]
 Classification Report:
               precision    recall  f1-score   support

           0       0.96      0.99      0.97        71
           1       0.98      0.93      0.95        43

    accuracy                           0.96       114
   macro avg       0.97      0.96      0.96       114
weighted avg       0.97      0.96      0.96       114


 Model: AdaBoost




 Accuracy: 0.9737
 Confusion Matrix:
 [[70  1]
 [ 2 41]]
 Classification Report:
               precision    recall  f1-score   support

           0       0.97      0.99      0.98        71
           1       0.98      0.95      0.96        43

    accuracy                           0.97       114
   macro avg       0.97      0.97      0.97       114
weighted avg       0.97      0.97      0.97       114


 Model: XGBoost
 Accuracy: 0.9561
 Confusion Matrix:
 [[69  2]
 [ 3 40]]
 Classification Report:
               precision    recall  f1-score   support

           0       0.96      0.97      0.97        71
           1       0.95      0.93      0.94        43

    accuracy                           0.96       114
   macro avg       0.96      0.95      0.95       114
weighted avg       0.96      0.96      0.96       114



Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
