# Bank Marketing Campaign Model Exploration
 
Team: Data Science Bank Marketers
- Amr Hacoglu
- Ha My Pham


## Problem Description
### ABC Bank aims to develop a machine learning model to predict whether a customer will subscribe to a term deposit product. This model will help the bank focus its marketing efforts on customers with a higher likelihood of purchasing the product, thereby optimizing resource allocation and reducing marketing costs.

# Import Libraries

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.ensemble import StackingClassifier
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score
from imblearn.over_sampling import SMOTE

# Load and Preprocess Data

In [None]:
df = pd.read_csv('../input/bank-additional-full.csv', sep=';')

# Display the first few rows and data info

In [None]:
print(df.head())
print(df.info())

# Preprocess the data

In [None]:
# Handle categorical variables
le = LabelEncoder()
categorical_cols = ['job', 'marital', 'education', 'default', 'housing', 'loan', 'contact', 'month', 'day_of_week', 'poutcome']
for col in categorical_cols:
    df[col] = le.fit_transform(df[col])

In [None]:
# Convert target variable
df['y'] = df['y'].map({'yes': 1, 'no': 0})

In [None]:
# Split features and target
X = df.drop('y', axis=1)
y = df['y']

In [None]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Apply SMOTE to handle class imbalance
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_resampled)
X_test_scaled = scaler.transform(X_test)

# Model Training and Evaluation

In [None]:
# Define models
models = {
    'Logistic Regression': LogisticRegression(random_state=42),
    'SVM': SVC(kernel='linear', probability=True, random_state=42),
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
    'XGBoost': XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
}

In [None]:
# Train and evaluate models
for name, model in models.items():
    model.fit(X_train_scaled, y_train_resampled)
    y_pred = model.predict(X_test_scaled)
    accuracy = accuracy_score(y_test, y_pred)
    roc_auc = roc_auc_score(y_test, model.predict_proba(X_test_scaled)[:, 1])
    print(f"\n{name}:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"ROC AUC: {roc_auc:.4f}")
    print("Classification Report:")
    print(classification_report(y_test, y_pred))

# Stacking Classifier

In [None]:
# Define and train Stacking Classifier
estimators = [
    ('lr', LogisticRegression(random_state=42)),
    ('rf', RandomForestClassifier(n_estimators=100, random_state=42)),
    ('xgb', XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42))
]
stacking_classifier = StackingClassifier(
    estimators=estimators, final_estimator=LogisticRegression(random_state=42)
)
stacking_classifier.fit(X_train_scaled, y_train_resampled)

In [None]:
# Evaluate Stacking Classifier
y_pred_stack = stacking_classifier.predict(X_test_scaled)
accuracy_stack = accuracy_score(y_test, y_pred_stack)
roc_auc_stack = roc_auc_score(y_test, stacking_classifier.predict_proba(X_test_scaled)[:, 1])
print("\nStacking Classifier:")
print(f"Accuracy: {accuracy_stack:.4f}")
print(f"ROC AUC: {roc_auc_stack:.4f}")
print("Classification Report:")
print(classification_report(y_test, y_pred_stack))