In [1]:
import numpy as np
import pandas as pd
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression

from sklearn.ensemble import (
    BaggingClassifier,
    AdaBoostClassifier,
    GradientBoostingClassifier,
    StackingClassifier
)

from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder


In [2]:
df = sns.load_dataset("titanic")

df = df[['survived','pclass','sex','age','sibsp',
         'parch','fare','embarked']]


In [3]:
df['age'].fillna(df['age'].median(), inplace=True)
df['embarked'].fillna(df['embarked'].mode()[0], inplace=True)

le = LabelEncoder()
df['sex'] = le.fit_transform(df['sex'])
df['embarked'] = le.fit_transform(df['embarked'])


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['age'].fillna(df['age'].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['embarked'].fillna(df['embarked'].mode()[0], inplace=True)


In [4]:
X = df.drop('survived', axis=1)
y = df['survived']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


# WITHOUT ENSEMBLE

In [5]:
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)

pred_dt = dt.predict(X_test)

print("Single Decision Tree:",
      accuracy_score(y_test, pred_dt))


Single Decision Tree: 0.7821229050279329


# BAGGING

In [6]:
bag = BaggingClassifier(
    estimator=DecisionTreeClassifier(),
    n_estimators=50,
    random_state=42
)

bag.fit(X_train, y_train)
pred_bag = bag.predict(X_test)

print("Bagging:",
      accuracy_score(y_test, pred_bag))


Bagging: 0.8100558659217877


# BOOSTING

## AdaBoost

In [7]:
ada = AdaBoostClassifier(
    estimator=DecisionTreeClassifier(max_depth=1),
    n_estimators=50,
    random_state=42
)

ada.fit(X_train, y_train)
pred_ada = ada.predict(X_test)

print("AdaBoost:",
      accuracy_score(y_test, pred_ada))


AdaBoost: 0.7988826815642458


## Gradient Boosting

In [8]:
gb = GradientBoostingClassifier(
    n_estimators=100,
    random_state=42
)

gb.fit(X_train, y_train)
pred_gb = gb.predict(X_test)

print("Gradient Boosting:",
      accuracy_score(y_test, pred_gb))


Gradient Boosting: 0.8100558659217877


# STACKING

In [9]:
base_models = [
    ('dt', DecisionTreeClassifier()),
    ('lr', LogisticRegression(max_iter=1000))
]

stack = StackingClassifier(
    estimators=base_models,
    final_estimator=LogisticRegression()
)

stack.fit(X_train, y_train)
pred_stack = stack.predict(X_test)

print("Stacking:",
      accuracy_score(y_test, pred_stack))


Stacking: 0.8044692737430168
