# Ensemble Models

In [1]:
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.ensemble import (
    BaggingClassifier, VotingClassifier,
    StackingClassifier, AdaBoostClassifier
)
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [2]:
X, y = load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [3]:
knn = make_pipeline(StandardScaler(), KNeighborsClassifier())
lr = make_pipeline(StandardScaler(), LogisticRegression(max_iter=5000))

| **Aspect**           | **Bagging**               | **Boosting**                  | **Voting**                      | **Stacking**                    |
| -------------------- | ------------------------- | ----------------------------- | ------------------------------- | ------------------------------- |
| **Base models**      | Same type (homogeneous)   | Usually trees (weak learners) | Different types (heterogeneous) | Different types (heterogeneous) |
| **Training style**   | Parallel, independent     | Sequential, dependent         | Parallel, independent           | Two-level (meta model)          |
| **Combination rule** | Averaging / majority vote | Weighted additive model       | Hard or soft voting             | Learned by meta-learner         |
| **Main goal**        | Reduce variance           | Reduce bias                   | Combine different views         | Optimize combination            |
| **Example**          | Random Forest             | AdaBoost, XGBoost             | VotingClassifier                | StackingClassifier              |

In [4]:
bag = BaggingClassifier(estimator=DecisionTreeClassifier(), n_estimators=20)

boost = AdaBoostClassifier(n_estimators=50)

vote = VotingClassifier(estimators=[
    ("knn", knn),
    ("lr", lr),
    ("tree", DecisionTreeClassifier()),
    ("naive", GaussianNB())
], voting="soft")

stack = StackingClassifier(
    estimators=[
        ("knn", knn),
        ("lr", lr),
        ("tree", DecisionTreeClassifier()),
        ("naive", GaussianNB())
    ],
    final_estimator=LogisticRegression(max_iter=1000)
)

models = {"Bagging": bag, "Boosting": boost, "Voting": vote, "Stacking": stack}

In [5]:
results = []

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    results.append({"Model": name, "Accuracy": f"{accuracy * 100:.2f}%"})

pd.DataFrame(results)

Unnamed: 0,Model,Accuracy
0,Bagging,95.61%
1,Boosting,96.49%
2,Voting,96.49%
3,Stacking,97.37%
