Output variable -> y
y -> Whether the client has subscribed a term deposit or not 
Binomial ("yes" or "no")
(Logistic regression)

In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# Data Preprocessing
# dataset
data = pd.read_csv("bank-full.csv", delimiter=";")

# Encoding categorical variables using Label Encoding
label_encoder = LabelEncoder()
categorical_columns = ["job", "marital", "education", "default", "housing", "loan", "contact", "month", "poutcome"]
for column in categorical_columns:
    data[column] = label_encoder.fit_transform(data[column])

# Defining features (X) and target variable (y)
X = data.drop(columns=["y"])
y = data["y"]

# Splitting the data into training and testing sets (70% training, 30% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Data Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Model Building
model = LogisticRegression(max_iter=10000)
model.fit(X_train_scaled, y_train)


# Making predictions on the test set
y_pred = model.predict(X_test_scaled)

# Calculating evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, pos_label="yes")
recall = recall_score(y_test, y_pred, pos_label="yes")
f1 = f1_score(y_test, y_pred, pos_label="yes")
confusion = confusion_matrix(y_test, y_pred)

# Printing the evaluation metrics
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("Confusion Matrix:\n", confusion)


Accuracy: 0.8898554998525509
Precision: 0.5899653979238755
Recall: 0.21339173967459324
F1 Score: 0.3134191176470588
Confusion Matrix:
 [[11729   237]
 [ 1257   341]]
