## 1.Import necessary libraries

In [42]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import LabelEncoder,StandardScaler
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import r2_score,accuracy_score,mean_squared_error

## 2.Import Dataset

In [43]:
bank_full = pd.read_csv('bank-full.csv',sep=';', quotechar='"')
bank_full

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,58,management,married,tertiary,no,2143,yes,no,unknown,5,may,261,1,-1,0,unknown,no
1,44,technician,single,secondary,no,29,yes,no,unknown,5,may,151,1,-1,0,unknown,no
2,33,entrepreneur,married,secondary,no,2,yes,yes,unknown,5,may,76,1,-1,0,unknown,no
3,47,blue-collar,married,unknown,no,1506,yes,no,unknown,5,may,92,1,-1,0,unknown,no
4,33,unknown,single,unknown,no,1,no,no,unknown,5,may,198,1,-1,0,unknown,no
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45206,51,technician,married,tertiary,no,825,no,no,cellular,17,nov,977,3,-1,0,unknown,yes
45207,71,retired,divorced,primary,no,1729,no,no,cellular,17,nov,456,2,-1,0,unknown,yes
45208,72,retired,married,secondary,no,5715,no,no,cellular,17,nov,1127,5,184,3,success,yes
45209,57,blue-collar,married,secondary,no,668,no,no,telephone,17,nov,508,4,-1,0,unknown,no


## 3.Initial investigation

In [44]:
bank_full.shape

(45211, 17)

In [45]:
bank_full.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45211 entries, 0 to 45210
Data columns (total 17 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   age        45211 non-null  int64 
 1   job        45211 non-null  object
 2   marital    45211 non-null  object
 3   education  45211 non-null  object
 4   default    45211 non-null  object
 5   balance    45211 non-null  int64 
 6   housing    45211 non-null  object
 7   loan       45211 non-null  object
 8   contact    45211 non-null  object
 9   day        45211 non-null  int64 
 10  month      45211 non-null  object
 11  duration   45211 non-null  int64 
 12  campaign   45211 non-null  int64 
 13  pdays      45211 non-null  int64 
 14  previous   45211 non-null  int64 
 15  poutcome   45211 non-null  object
 16  y          45211 non-null  object
dtypes: int64(7), object(10)
memory usage: 5.9+ MB


In [46]:
bank_full.describe()

Unnamed: 0,age,balance,day,duration,campaign,pdays,previous
count,45211.0,45211.0,45211.0,45211.0,45211.0,45211.0,45211.0
mean,40.93621,1362.272058,15.806419,258.16308,2.763841,40.197828,0.580323
std,10.618762,3044.765829,8.322476,257.527812,3.098021,100.128746,2.303441
min,18.0,-8019.0,1.0,0.0,1.0,-1.0,0.0
25%,33.0,72.0,8.0,103.0,1.0,-1.0,0.0
50%,39.0,448.0,16.0,180.0,2.0,-1.0,0.0
75%,48.0,1428.0,21.0,319.0,3.0,-1.0,0.0
max,95.0,102127.0,31.0,4918.0,63.0,871.0,275.0


In [47]:
bank_full.dtypes

age           int64
job          object
marital      object
education    object
default      object
balance       int64
housing      object
loan         object
contact      object
day           int64
month        object
duration      int64
campaign      int64
pdays         int64
previous      int64
poutcome     object
y            object
dtype: object

In [48]:
bank_full.isnull().sum()

age          0
job          0
marital      0
education    0
default      0
balance      0
housing      0
loan         0
contact      0
day          0
month        0
duration     0
campaign     0
pdays        0
previous     0
poutcome     0
y            0
dtype: int64

## 4.Data Processing

In [49]:
label = LabelEncoder()
bank_full['job']       = label.fit_transform(bank_full['job'])
bank_full['marital']   = label.fit_transform(bank_full['marital'])
bank_full['education'] = label.fit_transform(bank_full['education'])
bank_full['default']   = label.fit_transform(bank_full['default'])
bank_full['housing']   = label.fit_transform(bank_full['housing'])
bank_full['loan']      = label.fit_transform(bank_full['loan'])
bank_full['contact']   = label.fit_transform(bank_full['contact'])
bank_full['month']     = label.fit_transform(bank_full['month'])
bank_full['poutcome']  = label.fit_transform(bank_full['poutcome'])
bank_full['y']  = label.fit_transform(bank_full['y'])

In [50]:
bank_full

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,58,4,1,2,0,2143,1,0,2,5,8,261,1,-1,0,3,0
1,44,9,2,1,0,29,1,0,2,5,8,151,1,-1,0,3,0
2,33,2,1,1,0,2,1,1,2,5,8,76,1,-1,0,3,0
3,47,1,1,3,0,1506,1,0,2,5,8,92,1,-1,0,3,0
4,33,11,2,3,0,1,0,0,2,5,8,198,1,-1,0,3,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45206,51,9,1,2,0,825,0,0,0,17,9,977,3,-1,0,3,1
45207,71,5,0,0,0,1729,0,0,0,17,9,456,2,-1,0,3,1
45208,72,5,1,1,0,5715,0,0,0,17,9,1127,5,184,3,2,1
45209,57,1,1,1,0,668,0,0,1,17,9,508,4,-1,0,3,0


In [51]:
bank_full.shape

(45211, 17)

## 5.Model building

In [52]:
X = bank_full.iloc[:,:-1]
y = bank_full.iloc[:,-1]

In [53]:
X.shape,y.shape

((45211, 16), (45211,))

In [54]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.20,random_state=12)
X_train.shape,X_test.shape,y_train.shape,y_test.shape

((36168, 16), (9043, 16), (36168,), (9043,))

In [55]:
y_train.dtypes

dtype('int32')

In [56]:
Bank_logistic_model = LogisticRegression()
Bank_logistic_model.fit(X_train,y_train)

LogisticRegression()

## 6.Model Testing

In [57]:
y_pred = Bank_logistic_model.predict(X_test)
y_pred

array([0, 0, 0, ..., 0, 0, 0])

## 7.Model Evaluation

In [58]:
cv_score = cross_val_score(estimator=Bank_logistic_model,X=X,y=y,cv=5)
print('cv_score:',cv_score)
print('Mean Accuracy {}\nDeivation {}.'.format(cv_score.mean(),cv_score.std()))

cv_score: [0.89992259 0.89581951 0.88431763 0.83311214 0.87436408]
Mean Accuracy 0.8775071903872581
Deivation 0.023937304232029007.


In [59]:
import warnings
warnings.filterwarnings('ignore')

In [60]:
print("Accuarcy Score :",round(accuracy_score(y_test,y_pred),4))
print("MSE            :",round(mean_squared_error(y_test,y_pred),4))

Accuarcy Score : 0.8897
MSE            : 0.1103


## 8.Model Depolyment

In [66]:
from pickle import dump

In [67]:
dump(Bank_logistic_model,open("Bank_logistic_model.pkl",'wb'))

In [68]:
from pickle import load

In [69]:
Bank_loaded = load(open("Bank_logistic_model.pkl",'rb'))

In [70]:
Bank_loaded.predict(X_test)

array([0, 0, 0, ..., 0, 0, 0])