In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score, confusion_matrix
from xgboost import XGBClassifier
import pickle


In [None]:
# Load the dataset
data = pd.read_csv('bank-full.csv', sep=';')

In [None]:
# Data preprocessing and feature engineering
# Assuming 'y' is the target variable
data['y'] = data['y'].map({'no': 0, 'yes': 1})


In [None]:
# Split the data into train and test
X = data.drop('y', axis=1)
y = data['y']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a logistic regression machine learning model
lr = LogisticRegression()
lr.fit(X_train, y_train)

# Print accuracy, precision, recall and AUC of the model and show the confusion matrix
y_pred_lr = lr.predict(X_test)
print("Logistic Regression Metrics:")
print("Accuracy: ", accuracy_score(y_test, y_pred_lr))
print("Precision: ", precision_score(y_test, y_pred_lr))
print("Recall: ", recall_score(y_test, y_pred_lr))
print("AUC: ", roc_auc_score(y_test, y_pred_lr))
print("Confusion Matrix: \n", confusion_matrix(y_test, y_pred_lr))

# Train a XGBoost machine learning model
xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
xgb.fit(X_train, y_train)

# Print accuracy, precision, recall and AUC of the model and show the confusion matrix
y_pred_xgb = xgb.predict(X_test)
print("\nXGBoost Metrics:")
print("Accuracy: ", accuracy_score(y_test, y_pred_xgb))
print("Precision: ", precision_score(y_test, y_pred_xgb))
print("Recall: ", recall_score(y_test, y_pred_xgb))
print("AUC: ", roc_auc_score(y_test, y_pred_xgb))
print("Confusion Matrix: \n", confusion_matrix(y_test, y_pred_xgb))

# Save the final model to a pickle file
pickle.dump(xgb, open("xgb_model.pkl", "wb"))
