# **STEP 1: DATA PREPROCESSING**

Import Libraries

In [76]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler

Load Dataset

In [77]:
data = pd.read_csv('BankCustomerData.csv')
print(data.head())

   age           job  marital  education default  balance housing loan  \
0   58    management  married   tertiary      no     2143     yes   no   
1   44    technician   single  secondary      no       29     yes   no   
2   33  entrepreneur  married  secondary      no        2     yes  yes   
3   47   blue-collar  married    unknown      no     1506     yes   no   
4   33       unknown   single    unknown      no        1      no   no   

   contact  day month  duration  campaign  pdays  previous poutcome  \
0  unknown    5   may       261         1     -1         0  unknown   
1  unknown    5   may       151         1     -1         0  unknown   
2  unknown    5   may        76         1     -1         0  unknown   
3  unknown    5   may        92         1     -1         0  unknown   
4  unknown    5   may       198         1     -1         0  unknown   

  term_deposit  
0           no  
1           no  
2           no  
3           no  
4           no  


Convert Categorical Variables

In [78]:
data_dummies = pd.get_dummies(data, drop_first = True)

Handle Missing Values

In [79]:
print(data.isnull().sum())

age             0
job             0
marital         0
education       0
default         0
balance         0
housing         0
loan            0
contact         0
day             0
month           0
duration        0
campaign        0
pdays           0
previous        0
poutcome        0
term_deposit    0
dtype: int64


Convert the Target Variable

In [80]:
data_dummies['new_loan'] = data['loan'].apply(lambda x:1 if x=="yes" else 0)

# **STEP 2: FEATURE SELECTION**

Define the Target Variable

In [88]:
data_dummies['req_age'] = (data['age']>33).astype(int)
data_dummies['new_loan'] = data['loan'].apply(lambda x:1 if x=="yes" else 0)

Select Features and Target Variable

In [89]:
x = data_dummies.drop(['age','balance','new_loan','req_age'], axis = 1)
y = data_dummies['req_age']

# **STEP 3: DATA SPLITTING**

Split the Data

In [96]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.20, random_state = 80)

Standardize the Features

In [97]:
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.fit_transform(x_test)

# **STEP 4: MODEL TRAINING**

Train the Logistic Regression Model

In [98]:
model = LogisticRegression()
model.fit(x_train_scaled, y_train)

Prediction on the Test Set

In [99]:
y_pred = model.predict(x_test_scaled)

# **STEP 5: MODEL EVALUATION**

Evaluate the Model

In [100]:
accuracy = accuracy_score(y_test,y_pred)
conf_matrix = confusion_matrix(y_test,y_pred)
class_report = classification_report(y_test,y_pred)

print(f"Accuracy: {accuracy}")
print("Confusion Matrix: ")
print(conf_matrix)
print("Classification Report: ")
print(class_report)

Accuracy: 0.7666510318949343
Confusion Matrix: 
[[1076 1304]
 [ 686 5462]]
Classification Report: 
              precision    recall  f1-score   support

           0       0.61      0.45      0.52      2380
           1       0.81      0.89      0.85      6148

    accuracy                           0.77      8528
   macro avg       0.71      0.67      0.68      8528
weighted avg       0.75      0.77      0.75      8528



# **STEP 6: CONCLUSION**

Summarize the model's performance and discuss any insights or implications for the bank's marketing strategies.


> The model's performance

