<a href="https://colab.research.google.com/github/sirikuragayathri/samsung.md/blob/main/Bagging%26Boosting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import time
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score

# Load the breast cancer dataset
cancer = load_breast_cancer()
X = cancer.data
y = cancer.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

In [3]:
# Initialize and train a Random Forest Classifier
start_time_rf = time.time()

rf_model = BaggingClassifier(estimator=DecisionTreeClassifier(),
                             n_estimators=100,
                             random_state=42)

rf_model.fit(X_train, y_train)

end_time_rf = time.time()

# Make predictions and evaluate
y_pred_rf = rf_model.predict(X_test)

accuracy_rf = accuracy_score(y_test, y_pred_rf)

roc_auc_rf = roc_auc_score(y_test, y_pred_rf)

training_time_rf = end_time_rf - start_time_rf

print("=== Bagging (Random Forest) ===")
print(f"Accuracy: {accuracy_rf:.4f}")

print(f"ROC AUC Score: {roc_auc_rf:.4f}")

print(f"Training Time: {training_time_rf:.4f} seconds")

print("\nClassification Report:")
print(classification_report(y_test, y_pred_rf))

=== Bagging (Random Forest) ===
Accuracy: 0.9474
ROC AUC Score: 0.9391
Training Time: 1.6637 seconds

Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.91      0.93        64
           1       0.95      0.97      0.96       107

    accuracy                           0.95       171
   macro avg       0.95      0.94      0.94       171
weighted avg       0.95      0.95      0.95       171



In [4]:
# Initialize and train an AdaBoost Classifier
start_time_ada = time.time()
ada_model = AdaBoostClassifier(estimator=DecisionTreeClassifier(max_depth=1),
                               n_estimators=100,
                               random_state=42,
                               learning_rate=0.5)

ada_model.fit(X_train, y_train)
end_time_ada = time.time()

# Make predictions and evaluate
y_pred_ada = ada_model.predict(X_test)
accuracy_ada = accuracy_score(y_test, y_pred_ada)
roc_auc_ada = roc_auc_score(y_test, y_pred_ada)
training_time_ada = end_time_ada - start_time_ada

print("\n=== Boosting (AdaBoost) ===")
print(f"Accuracy: {accuracy_ada:.4f}")

print(f"ROC AUC Score: {roc_auc_ada:.4f}")

print(f"Training Time: {training_time_ada:.4f} seconds")

print("\nClassification Report:")
print(classification_report(y_test, y_pred_ada))


=== Boosting (AdaBoost) ===
Accuracy: 0.9532
ROC AUC Score: 0.9438
Training Time: 0.9103 seconds

Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.91      0.94        64
           1       0.95      0.98      0.96       107

    accuracy                           0.95       171
   macro avg       0.96      0.94      0.95       171
weighted avg       0.95      0.95      0.95       171



In [5]:
# Initialize and train a Gradient Boosting Classifier
start_time_gb = time.time()
gb_model = GradientBoostingClassifier(n_estimators=100,
                                      learning_rate=0.1,
                                      max_depth=3,
                                      random_state=42)
gb_model.fit(X_train, y_train)
end_time_gb = time.time()

# Make predictions and evaluate
y_pred_gb = gb_model.predict(X_test)
accuracy_gb = accuracy_score(y_test, y_pred_gb)
roc_auc_gb = roc_auc_score(y_test, y_pred_gb)
training_time_gb = end_time_gb - start_time_gb

print("\n=== Boosting (Gradient Boosting) ===")
print(f"Accuracy: {accuracy_gb:.4f}")

print(f"ROC AUC Score: {roc_auc_gb:.4f}")

print(f"Training Time: {training_time_gb:.4f} seconds")

print("\nClassification Report:")
print(classification_report(y_test, y_pred_gb))


=== Boosting (Gradient Boosting) ===
Accuracy: 0.9474
ROC AUC Score: 0.9391
Training Time: 2.8064 seconds

Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.91      0.93        64
           1       0.95      0.97      0.96       107

    accuracy                           0.95       171
   macro avg       0.95      0.94      0.94       171
weighted avg       0.95      0.95      0.95       171



In [6]:
import numpy as np
import time
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score

# Load the breast cancer dataset
cancer = load_breast_cancer()
X = cancer.data
y = cancer.target

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [7]:
# Define the base models (Level-0)
estimators = [
    ('knn', KNeighborsClassifier(n_neighbors=5)),
    ('svm', SVC(kernel='rbf', C=1.0, probability=True, random_state=42)), # probability=True is needed for predict_proba
    ('gb', GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, random_state=42))
]

# Define the final estimator (Meta-model)
final_estimator = LogisticRegression()

# Initialize the Stacking Classifier
stacking_model = StackingClassifier(
    estimators=estimators,
    final_estimator=final_estimator,
    cv=5, # Use cross-validation to prevent data leakage
    passthrough=False # Set to True to include original features as well
)

In [8]:
# Train the stacking model
start_time_stack = time.time()
stacking_model.fit(X_train_scaled, y_train)
end_time_stack = time.time()

# Make predictions and evaluate
y_pred_stack = stacking_model.predict(X_test_scaled)
accuracy_stack = accuracy_score(y_test, y_pred_stack)
roc_auc_stack = roc_auc_score(y_test, y_pred_stack)
training_time_stack = end_time_stack - start_time_stack

print("=== Stacking Classifier ===")
print(f"Accuracy: {accuracy_stack:.4f}")
print(f"ROC AUC Score: {roc_auc_stack:.4f}")
print(f"Training Time: {training_time_stack:.4f} seconds")
print("\nClassification Report:")
print(classification_report(y_test, y_pred_stack))

=== Stacking Classifier ===
Accuracy: 0.9649
ROC AUC Score: 0.9563
Training Time: 6.2760 seconds

Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.92      0.95        64
           1       0.95      0.99      0.97       107

    accuracy                           0.96       171
   macro avg       0.97      0.96      0.96       171
weighted avg       0.97      0.96      0.96       171

