In [26]:
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier, AdaBoostClassifier, StackingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
import xgboost as xgb

In [11]:
df = sns.load_dataset("titanic")

In [12]:
df = df.drop(columns=["deck", "embark_town", "alive", "who", "class", "adult_male"])

In [13]:
df["age"] = df["age"].fillna(df["age"].median())
df["embarked"] = df["embarked"].fillna(df["embarked"].mode()[0])
df["fare"] = df["fare"].fillna(df["fare"].median())

In [14]:
categorical_cols = ["sex", "embarked", "alone"]
df[categorical_cols] = df[categorical_cols].apply(LabelEncoder().fit_transform)

In [15]:
X = df.drop(columns=["survived"])
y = df["survived"]

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [19]:
dt = DecisionTreeClassifier(random_state=42)
rf = RandomForestClassifier(n_estimators=100, random_state=42)
knn = KNeighborsClassifier(n_neighbors=5)

In [20]:
dt.fit(X_train, y_train)
rf.fit(X_train, y_train)
knn.fit(X_train, y_train)


In [21]:
def evaluate_model(name, model):
    y_pred = model.predict(X_test)
    print(f"{name} Accuracy: {accuracy_score(y_test, y_pred):.4f}")
    print(classification_report(y_test, y_pred))
    print("-" * 50)

evaluate_model("Decision Tree", dt)
evaluate_model("Random Forest", rf)
evaluate_model("KNN", knn)


Decision Tree Accuracy: 0.7821
              precision    recall  f1-score   support

           0       0.82      0.80      0.81       105
           1       0.73      0.76      0.74        74

    accuracy                           0.78       179
   macro avg       0.78      0.78      0.78       179
weighted avg       0.78      0.78      0.78       179

--------------------------------------------------
Random Forest Accuracy: 0.8268
              precision    recall  f1-score   support

           0       0.84      0.87      0.85       105
           1       0.80      0.77      0.79        74

    accuracy                           0.83       179
   macro avg       0.82      0.82      0.82       179
weighted avg       0.83      0.83      0.83       179

--------------------------------------------------
KNN Accuracy: 0.6983
              precision    recall  f1-score   support

           0       0.71      0.82      0.76       105
           1       0.67      0.53      0.59        7

In [22]:
stacking = StackingClassifier(
    estimators=[("rf", rf), ("dt", dt), ("knn", knn)],
    final_estimator=LogisticRegression()
)
stacking.fit(X_train, y_train)
evaluate_model("Stacking Ensemble", stacking)


Stacking Ensemble Accuracy: 0.8212
              precision    recall  f1-score   support

           0       0.82      0.89      0.85       105
           1       0.82      0.73      0.77        74

    accuracy                           0.82       179
   macro avg       0.82      0.81      0.81       179
weighted avg       0.82      0.82      0.82       179

--------------------------------------------------


In [24]:
boosting = AdaBoostClassifier(n_estimators=50, random_state=42)
boosting.fit(X_train, y_train)
evaluate_model("Boosting Ensemble (AdaBoost)", boosting)

Boosting Ensemble (AdaBoost) Accuracy: 0.7989
              precision    recall  f1-score   support

           0       0.81      0.87      0.83       105
           1       0.79      0.70      0.74        74

    accuracy                           0.80       179
   macro avg       0.80      0.78      0.79       179
weighted avg       0.80      0.80      0.80       179

--------------------------------------------------


In [27]:
xgb_model = xgb.XGBClassifier(n_estimators=100, learning_rate=0.1, random_state=42)
xgb_model.fit(X_train, y_train)
evaluate_model("Boosting Ensemble (XGBoost)", xgb_model)

Boosting Ensemble (XGBoost) Accuracy: 0.8212
              precision    recall  f1-score   support

           0       0.83      0.87      0.85       105
           1       0.80      0.76      0.78        74

    accuracy                           0.82       179
   macro avg       0.82      0.81      0.81       179
weighted avg       0.82      0.82      0.82       179

--------------------------------------------------
