In [1]:
'''This project involves predicting whether a bank customer will subscribe to a term deposit using logistic regression and decision tree classifiers. The dataset is loaded, and features (X) and target (Y) variables are defined. The data is then split into training and testing sets, with features standardized using StandardScaler. Logistic regression and decision tree models are trained and evaluated using confusion matrix and classification report metrics. The decision tree model's performance is further refined by adjusting its `max_depth` and `min_samples_leaf` parameters. Various configurations of the decision tree, including the use of the 'entropy' criterion, are tested to identify the optimal model parameters for the best prediction accuracy. This project demonstrates model training, evaluation, and hyperparameter tuning for classification tasks.'''

"This project involves predicting whether a bank customer will subscribe to a term deposit using logistic regression and decision tree classifiers. The dataset is loaded, and features (X) and target (Y) variables are defined. The data is then split into training and testing sets, with features standardized using StandardScaler. Logistic regression and decision tree models are trained and evaluated using confusion matrix and classification report metrics. The decision tree model's performance is further refined by adjusting its `max_depth` and `min_samples_leaf` parameters. Various configurations of the decision tree, including the use of the 'entropy' criterion, are tested to identify the optimal model parameters for the best prediction accuracy. This project demonstrates model training, evaluation, and hyperparameter tuning for classification tasks."

In [2]:
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler 
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

import warnings
warnings.filterwarnings('ignore')

In [3]:
df = pd.read_csv('bank.csv')
df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,deposit
0,59,0,1,1,0,2343,1,0,2,5,8,1042,1,-1,0,3,1
1,56,0,1,1,0,45,0,0,2,5,8,1467,1,-1,0,3,1
2,41,9,1,1,0,1270,1,0,2,5,8,1389,1,-1,0,3,1
3,55,7,1,1,0,2476,1,0,2,5,8,579,1,-1,0,3,1
4,54,0,1,2,0,184,0,0,2,5,8,673,2,-1,0,3,1


In [4]:
X = df.drop(columns='deposit')
Y = df['deposit'] 
X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.3,random_state=1)

In [5]:
ss = StandardScaler()
X_train = ss.fit_transform(X_train)
X_test = ss.transform(X_test)

In [6]:
def create_model(model):
    model.fit(X_train, Y_train)
    Y_predict=model.predict(X_test)
    print(confusion_matrix(Y_test,Y_predict))
    print(classification_report(Y_test,Y_predict))

In [7]:
lor = LogisticRegression()
create_model(lor)

[[1441  319]
 [ 359 1230]]
              precision    recall  f1-score   support

           0       0.80      0.82      0.81      1760
           1       0.79      0.77      0.78      1589

    accuracy                           0.80      3349
   macro avg       0.80      0.80      0.80      3349
weighted avg       0.80      0.80      0.80      3349



In [8]:
dtc = DecisionTreeClassifier()
create_model(dtc)

[[1400  360]
 [ 359 1230]]
              precision    recall  f1-score   support

           0       0.80      0.80      0.80      1760
           1       0.77      0.77      0.77      1589

    accuracy                           0.79      3349
   macro avg       0.78      0.78      0.78      3349
weighted avg       0.79      0.79      0.79      3349



In [9]:
for i in range(1,9):
    print(i)
    dtc = DecisionTreeClassifier(max_depth=i, random_state=1)
    create_model(dtc)

1
[[1092  668]
 [ 296 1293]]
              precision    recall  f1-score   support

           0       0.79      0.62      0.69      1760
           1       0.66      0.81      0.73      1589

    accuracy                           0.71      3349
   macro avg       0.72      0.72      0.71      3349
weighted avg       0.73      0.71      0.71      3349

2
[[1092  668]
 [ 296 1293]]
              precision    recall  f1-score   support

           0       0.79      0.62      0.69      1760
           1       0.66      0.81      0.73      1589

    accuracy                           0.71      3349
   macro avg       0.72      0.72      0.71      3349
weighted avg       0.73      0.71      0.71      3349

3
[[1212  548]
 [ 218 1371]]
              precision    recall  f1-score   support

           0       0.85      0.69      0.76      1760
           1       0.71      0.86      0.78      1589

    accuracy                           0.77      3349
   macro avg       0.78      0.78      0.

In [10]:
dtc = DecisionTreeClassifier(max_depth=5)
create_model(dtc)

[[1316  444]
 [ 229 1360]]
              precision    recall  f1-score   support

           0       0.85      0.75      0.80      1760
           1       0.75      0.86      0.80      1589

    accuracy                           0.80      3349
   macro avg       0.80      0.80      0.80      3349
weighted avg       0.81      0.80      0.80      3349



In [11]:
for i in range(45,101):
    print(i)
    dtc = DecisionTreeClassifier(min_samples_leaf=i, random_state=1)
    create_model(dtc)

45
[[1379  381]
 [ 204 1385]]
              precision    recall  f1-score   support

           0       0.87      0.78      0.83      1760
           1       0.78      0.87      0.83      1589

    accuracy                           0.83      3349
   macro avg       0.83      0.83      0.83      3349
weighted avg       0.83      0.83      0.83      3349

46
[[1379  381]
 [ 204 1385]]
              precision    recall  f1-score   support

           0       0.87      0.78      0.83      1760
           1       0.78      0.87      0.83      1589

    accuracy                           0.83      3349
   macro avg       0.83      0.83      0.83      3349
weighted avg       0.83      0.83      0.83      3349

47
[[1379  381]
 [ 204 1385]]
              precision    recall  f1-score   support

           0       0.87      0.78      0.83      1760
           1       0.78      0.87      0.83      1589

    accuracy                           0.83      3349
   macro avg       0.83      0.83     

              precision    recall  f1-score   support

           0       0.84      0.81      0.82      1760
           1       0.80      0.83      0.81      1589

    accuracy                           0.82      3349
   macro avg       0.82      0.82      0.82      3349
weighted avg       0.82      0.82      0.82      3349

74
[[1427  333]
 [ 277 1312]]
              precision    recall  f1-score   support

           0       0.84      0.81      0.82      1760
           1       0.80      0.83      0.81      1589

    accuracy                           0.82      3349
   macro avg       0.82      0.82      0.82      3349
weighted avg       0.82      0.82      0.82      3349

75
[[1422  338]
 [ 274 1315]]
              precision    recall  f1-score   support

           0       0.84      0.81      0.82      1760
           1       0.80      0.83      0.81      1589

    accuracy                           0.82      3349
   macro avg       0.82      0.82      0.82      3349
weighted avg  

              precision    recall  f1-score   support

           0       0.82      0.80      0.81      1760
           1       0.79      0.81      0.80      1589

    accuracy                           0.81      3349
   macro avg       0.81      0.81      0.81      3349
weighted avg       0.81      0.81      0.81      3349

100
[[1416  344]
 [ 307 1282]]
              precision    recall  f1-score   support

           0       0.82      0.80      0.81      1760
           1       0.79      0.81      0.80      1589

    accuracy                           0.81      3349
   macro avg       0.81      0.81      0.81      3349
weighted avg       0.81      0.81      0.81      3349



In [12]:
dtc = DecisionTreeClassifier(min_samples_leaf=45, random_state=1)
create_model(dtc)

[[1379  381]
 [ 204 1385]]
              precision    recall  f1-score   support

           0       0.87      0.78      0.83      1760
           1       0.78      0.87      0.83      1589

    accuracy                           0.83      3349
   macro avg       0.83      0.83      0.83      3349
weighted avg       0.83      0.83      0.83      3349



In [13]:
for i in range(45,101):
    dtc = DecisionTreeClassifier(criterion='entropy', min_samples_leaf=i, random_state=1)
    print(i)
    create_model(dtc)

45
[[1438  322]
 [ 283 1306]]
              precision    recall  f1-score   support

           0       0.84      0.82      0.83      1760
           1       0.80      0.82      0.81      1589

    accuracy                           0.82      3349
   macro avg       0.82      0.82      0.82      3349
weighted avg       0.82      0.82      0.82      3349

46
[[1438  322]
 [ 283 1306]]
              precision    recall  f1-score   support

           0       0.84      0.82      0.83      1760
           1       0.80      0.82      0.81      1589

    accuracy                           0.82      3349
   macro avg       0.82      0.82      0.82      3349
weighted avg       0.82      0.82      0.82      3349

47
[[1430  330]
 [ 267 1322]]
              precision    recall  f1-score   support

           0       0.84      0.81      0.83      1760
           1       0.80      0.83      0.82      1589

    accuracy                           0.82      3349
   macro avg       0.82      0.82     

[[1427  333]
 [ 277 1312]]
              precision    recall  f1-score   support

           0       0.84      0.81      0.82      1760
           1       0.80      0.83      0.81      1589

    accuracy                           0.82      3349
   macro avg       0.82      0.82      0.82      3349
weighted avg       0.82      0.82      0.82      3349

72
[[1407  353]
 [ 270 1319]]
              precision    recall  f1-score   support

           0       0.84      0.80      0.82      1760
           1       0.79      0.83      0.81      1589

    accuracy                           0.81      3349
   macro avg       0.81      0.81      0.81      3349
weighted avg       0.82      0.81      0.81      3349

73
[[1408  352]
 [ 270 1319]]
              precision    recall  f1-score   support

           0       0.84      0.80      0.82      1760
           1       0.79      0.83      0.81      1589

    accuracy                           0.81      3349
   macro avg       0.81      0.82      0.

[[1358  402]
 [ 250 1339]]
              precision    recall  f1-score   support

           0       0.84      0.77      0.81      1760
           1       0.77      0.84      0.80      1589

    accuracy                           0.81      3349
   macro avg       0.81      0.81      0.81      3349
weighted avg       0.81      0.81      0.81      3349

95
[[1358  402]
 [ 250 1339]]
              precision    recall  f1-score   support

           0       0.84      0.77      0.81      1760
           1       0.77      0.84      0.80      1589

    accuracy                           0.81      3349
   macro avg       0.81      0.81      0.81      3349
weighted avg       0.81      0.81      0.81      3349

96
[[1358  402]
 [ 250 1339]]
              precision    recall  f1-score   support

           0       0.84      0.77      0.81      1760
           1       0.77      0.84      0.80      1589

    accuracy                           0.81      3349
   macro avg       0.81      0.81      0.

In [14]:
dtc = DecisionTreeClassifier(criterion='entropy', min_samples_leaf=80, random_state=1)
create_model(dtc)

[[1395  365]
 [ 257 1332]]
              precision    recall  f1-score   support

           0       0.84      0.79      0.82      1760
           1       0.78      0.84      0.81      1589

    accuracy                           0.81      3349
   macro avg       0.81      0.82      0.81      3349
weighted avg       0.82      0.81      0.81      3349

