In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, accuracy_score
import seaborn as sns
from sklearn.preprocessing import StandardScaler

In [None]:
df = pd.read_csv('/content/bank.csv')

In [None]:
df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,deposit
0,59,admin.,married,secondary,no,2343,yes,no,unknown,5,may,1042,1,-1,0,unknown,yes
1,56,admin.,married,secondary,no,45,no,no,unknown,5,may,1467,1,-1,0,unknown,yes
2,41,technician,married,secondary,no,1270,yes,no,unknown,5,may,1389,1,-1,0,unknown,yes
3,55,services,married,secondary,no,2476,yes,no,unknown,5,may,579,1,-1,0,unknown,yes
4,54,admin.,married,tertiary,no,184,no,no,unknown,5,may,673,2,-1,0,unknown,yes


In [None]:
df['poutcome'].value_counts()

unknown    8326
failure    1228
success    1071
other       537
Name: poutcome, dtype: int64

In [None]:
df['loan'].value_counts()

no     9702
yes    1460
Name: loan, dtype: int64

In [None]:
df.drop(['education','day','contact','month'],axis=1, inplace=True)

In [None]:
df.head()

Unnamed: 0,age,job,marital,default,balance,housing,loan,duration,campaign,pdays,previous,poutcome,deposit
0,59,admin.,married,no,2343,yes,no,1042,1,-1,0,unknown,yes
1,56,admin.,married,no,45,no,no,1467,1,-1,0,unknown,yes
2,41,technician,married,no,1270,yes,no,1389,1,-1,0,unknown,yes
3,55,services,married,no,2476,yes,no,579,1,-1,0,unknown,yes
4,54,admin.,married,no,184,no,no,673,2,-1,0,unknown,yes


In [None]:
feat = ['job', 'marital','housing','poutcome','loan','default']

In [None]:
neu = ['balance','duration','pdays','campaign', 'previous' ]

In [None]:
sc = StandardScaler()

In [None]:
df[neu] = sc.fit_transform(df[neu])

In [None]:
df.head()

Unnamed: 0,age,job,marital,default,balance,housing,loan,duration,campaign,pdays,previous,poutcome,deposit
0,59,admin.,married,no,0.252525,yes,no,1.930226,-0.554168,-0.481184,-0.36326,unknown,yes
1,56,admin.,married,no,-0.459974,no,no,3.154612,-0.554168,-0.481184,-0.36326,unknown,yes
2,41,technician,married,no,-0.08016,yes,no,2.929901,-0.554168,-0.481184,-0.36326,unknown,yes
3,55,services,married,no,0.293762,yes,no,0.596366,-0.554168,-0.481184,-0.36326,unknown,yes
4,54,admin.,married,no,-0.416876,no,no,0.867171,-0.186785,-0.481184,-0.36326,unknown,yes


In [None]:
df = pd.get_dummies(df, columns= feat)

In [None]:
df.head()

Unnamed: 0,age,balance,duration,campaign,pdays,previous,deposit,job_admin.,job_blue-collar,job_entrepreneur,job_housemaid,job_management,job_retired,job_self-employed,job_services,job_student,job_technician,job_unemployed,job_unknown,marital_divorced,marital_married,marital_single,housing_no,housing_yes,poutcome_failure,poutcome_other,poutcome_success,poutcome_unknown,loan_no,loan_yes,default_no,default_yes
0,59,0.252525,1.930226,-0.554168,-0.481184,-0.36326,yes,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,1,0,1,0
1,56,-0.459974,3.154612,-0.554168,-0.481184,-0.36326,yes,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,0
2,41,-0.08016,2.929901,-0.554168,-0.481184,-0.36326,yes,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,0
3,55,0.293762,0.596366,-0.554168,-0.481184,-0.36326,yes,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,1,0,1,0
4,54,-0.416876,0.867171,-0.186785,-0.481184,-0.36326,yes,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,0


In [None]:
df['deposit'] = df['deposit'].map({'yes':1, 'no':0})

In [None]:
df.head()

Unnamed: 0,age,balance,duration,campaign,pdays,previous,deposit,job_admin.,job_blue-collar,job_entrepreneur,job_housemaid,job_management,job_retired,job_self-employed,job_services,job_student,job_technician,job_unemployed,job_unknown,marital_divorced,marital_married,marital_single,housing_no,housing_yes,poutcome_failure,poutcome_other,poutcome_success,poutcome_unknown,loan_no,loan_yes,default_no,default_yes
0,59,0.252525,1.930226,-0.554168,-0.481184,-0.36326,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,1,0,1,0
1,56,-0.459974,3.154612,-0.554168,-0.481184,-0.36326,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,0
2,41,-0.08016,2.929901,-0.554168,-0.481184,-0.36326,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,0
3,55,0.293762,0.596366,-0.554168,-0.481184,-0.36326,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,1,0,1,0
4,54,-0.416876,0.867171,-0.186785,-0.481184,-0.36326,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,1,0,1,0


In [None]:
df['target'] = df['deposit']
df.drop(['deposit'], axis=1, inplace= True)

In [None]:
X = df.iloc[:,:-1]
y = df.iloc[:,-1]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3)

In [None]:
dt = DecisionTreeClassifier()

In [None]:
dt.fit(X_train,y_train)

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='best')

In [None]:
pred = dt.predict(X_test)

In [None]:
print(confusion_matrix(y_test, pred))

[[1320  462]
 [ 442 1125]]


In [None]:
lm = LogisticRegression()

In [None]:
lm.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [None]:
lm_pred = lm.predict(X_test)

In [None]:
print(confusion_matrix(y_test, lm_pred))

[[1502  250]
 [ 468 1129]]
