# Ensemble Methods on Bank Marketing Dataset
This notebook demonstrates Bagging, Voting and Stacking using the `bank.csv` dataset.

## 1. Imports

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier, VotingClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib
sns.set(style="whitegrid")

## 2. Load and preprocess data

In [None]:
data_path = r"c:\\Users\\ThinkBook\\Desktop\\AI\\AI_2026\\Day05 Decision tree and Essemble method\\groupB\\hak_BankMarketing\\bank.csv"
df = pd.read_csv(data_path)
# encode target
le = LabelEncoder()
df['deposit_encoded'] = le.fit_transform(df['deposit'])
# categorical columns (exclude target)
cat_cols = df.select_dtypes(include='object').columns.tolist()
if 'deposit' in cat_cols: cat_cols.remove('deposit')
# one-hot encode
df_enc = pd.get_dummies(df, columns=cat_cols, drop_first=True)
y = df_enc['deposit_encoded']
X = df_enc.drop(['deposit','deposit_encoded'], axis=1)
# scale numeric columns
num_cols = X.select_dtypes(include=['int64','float64']).columns
scaler = StandardScaler()
X[num_cols] = scaler.fit_transform(X[num_cols])
print('X shape, y shape:', X.shape, y.shape)

## 3. Train/test split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
print('Train:', X_train.shape, 'Test:', X_test.shape)

## 4. Baseline models: Decision Tree & Random Forest

In [None]:
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)
rf = RandomForestClassifier(random_state=42, n_jobs=-1)
rf.fit(X_train, y_train)
print('DT acc:', accuracy_score(y_test, dt.predict(X_test)))
print('RF acc:', accuracy_score(y_test, rf.predict(X_test)))

## 5. Bagging (DecisionTree base) 

In [None]:
bag = BaggingClassifier(base_estimator=DecisionTreeClassifier(), n_estimators=50, random_state=42, n_jobs=-1)
bag.fit(X_train, y_train)
print('Bagging acc:', accuracy_score(y_test, bag.predict(X_test)))
print(classification_report(y_test, bag.predict(X_test)))

## 6. Voting Classifier (DT + RF + Bagging)

In [None]:
voting = VotingClassifier(estimators=[('dt', dt), ('rf', rf), ('bag', bag)], voting='soft', n_jobs=-1)
voting.fit(X_train, y_train)
print('Voting acc:', accuracy_score(y_test, voting.predict(X_test)))
print(classification_report(y_test, voting.predict(X_test)))

## 7. Stacking Classifier (DT + RF -> Logistic Regression) 

In [None]:
stack = StackingClassifier(estimators=[('dt', dt), ('rf', rf)], final_estimator=LogisticRegression(max_iter=1000), n_jobs=-1)
stack.fit(X_train, y_train)
print('Stacking acc:', accuracy_score(y_test, stack.predict(X_test)))
print(classification_report(y_test, stack.predict(X_test)))

## 8. Compare and save models

In [None]:
models = {'dt': dt, 'rf': rf, 'bag': bag, 'voting': voting, 'stack': stack}
for name, m in models.items():
    acc = accuracy_score(y_test, m.predict(X_test))
    print(f'{name}: {acc:.4f}')
# save models + preprocessors
out_dir = r'c:\\Users\\ThinkBook\\Desktop\\AI\\AI_2026\\Day05 Decision tree and Essemble method\\groupB\\hak_BankMarketing\\'
for name, m in models.items():
    joblib.dump(m, out_dir + f'{name}_model.joblib')
joblib.dump(le, out_dir + 'label_encoder.joblib')
joblib.dump(scaler, out_dir + 'scaler.joblib')
print('Saved models and preprocessors to', out_dir)

---

Run cells in order. Adjust `n_estimators` and base estimators as desired.