In [6]:
import pandas as pd
import os
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, matthews_corrcoef

In [7]:
# Load dataset
df = pd.read_csv('../bank-marketing-full.csv', sep=';')

# Encode target variable
le = LabelEncoder()
df['y'] = le.fit_transform(df['y'])

df_encoded = pd.get_dummies(df)

# Split features and target
X = df_encoded.drop('y', axis=1)
y = df_encoded['y']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(f"Training features shape: {X_train.shape}")

Training features shape: (32950, 63)


In [8]:
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test)[:, 1] if hasattr(model, "predict_proba") else y_pred
    
    return {
        "Accuracy": accuracy_score(y_test, y_pred),
        "AUC Score": roc_auc_score(y_test, y_prob),
        "Precision": precision_score(y_test, y_pred),
        "Recall": recall_score(y_test, y_pred),
        "F1 Score": f1_score(y_test, y_pred),
        "MCC": matthews_corrcoef(y_test, y_pred)
    }
results = {}

In [9]:
#1.Logistic Regression
lr = LogisticRegression(max_iter=1000)
lr.fit(X_train, y_train)
results["Logistic Regression"] = evaluate_model(lr, X_test, y_test)

#2.Decision Tree
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)
results["Decision Tree"] = evaluate_model(dt, X_test, y_test)

#3.K-Nearest Neighbors
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
results["KNN"] = evaluate_model(knn, X_test, y_test)

#4.Naive Bayes
nb = GaussianNB()
nb.fit(X_train, y_train)
results["Naive Bayes"] = evaluate_model(nb, X_test, y_test)

# .Random Forest
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)
results["Random Forest"] = evaluate_model(rf, X_test, y_test)

#6.XGBoost
xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
xgb.fit(X_train, y_train)
results["XGBoost"] = evaluate_model(xgb, X_test, y_test)

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=1000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


In [10]:
results_df = pd.DataFrame(results).T
print(results_df.round(3).to_markdown())

|                     |   Accuracy |   AUC Score |   Precision |   Recall |   F1 Score |   MCC |
|:--------------------|-----------:|------------:|------------:|---------:|-----------:|------:|
| Logistic Regression |      0.911 |       0.933 |       0.672 |    0.42  |      0.517 | 0.487 |
| Decision Tree       |      0.887 |       0.732 |       0.502 |    0.532 |      0.516 | 0.452 |
| KNN                 |      0.902 |       0.87  |       0.586 |    0.468 |      0.52  | 0.47  |
| Naive Bayes         |      0.865 |       0.829 |       0.422 |    0.521 |      0.466 | 0.392 |
| Random Forest       |      0.91  |       0.938 |       0.652 |    0.448 |      0.531 | 0.494 |
| XGBoost             |      0.915 |       0.945 |       0.646 |    0.548 |      0.593 | 0.548 |


In [13]:
# Save all models as .pkl files
models = {
    "logistic_regression": lr,
    "decision_tree": dt,
    "knn": knn,
    "naive_bayes": nb,
    "random_forest": rf,
    "xgboost": xgb
}

for name, model in models.items():
    with open(f'{name}.pkl', 'wb') as f:
        pickle.dump(model, f)
        
print("All models successfully saved to the 'model/' directory.")

All models successfully saved to the 'model/' directory.
