In [1]:
#Importing some important libraries 
import numpy as np 
import pandas as pd
#important libraries to habdle catigerocal data
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder
#libraries to handle splitting 
from sklearn.model_selection import train_test_split
#libraries to handle feature scaling 
from sklearn.preprocessing import StandardScaler
# DL libraries
from tensorflow.keras.models import Sequential  
from tensorflow.keras.layers import Dense

In [2]:
#Reading dataset
df = pd.read_csv('bank.csv')

In [3]:
df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,deposit
0,59,admin.,married,secondary,no,2343,yes,no,unknown,5,may,1042,1,-1,0,unknown,yes
1,56,admin.,married,secondary,no,45,no,no,unknown,5,may,1467,1,-1,0,unknown,yes
2,41,technician,married,secondary,no,1270,yes,no,unknown,5,may,1389,1,-1,0,unknown,yes
3,55,services,married,secondary,no,2476,yes,no,unknown,5,may,579,1,-1,0,unknown,yes
4,54,admin.,married,tertiary,no,184,no,no,unknown,5,may,673,2,-1,0,unknown,yes


In [4]:
#checking for missing data if there is any and how many 
df.isnull().sum()

age          0
job          0
marital      0
education    0
default      0
balance      0
housing      0
loan         0
contact      0
day          0
month        0
duration     0
campaign     0
pdays        0
previous     0
poutcome     0
deposit      0
dtype: int64

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11162 entries, 0 to 11161
Data columns (total 17 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   age        11162 non-null  int64 
 1   job        11162 non-null  object
 2   marital    11162 non-null  object
 3   education  11162 non-null  object
 4   default    11162 non-null  object
 5   balance    11162 non-null  int64 
 6   housing    11162 non-null  object
 7   loan       11162 non-null  object
 8   contact    11162 non-null  object
 9   day        11162 non-null  int64 
 10  month      11162 non-null  object
 11  duration   11162 non-null  int64 
 12  campaign   11162 non-null  int64 
 13  pdays      11162 non-null  int64 
 14  previous   11162 non-null  int64 
 15  poutcome   11162 non-null  object
 16  deposit    11162 non-null  object
dtypes: int64(7), object(10)
memory usage: 1.4+ MB


In [6]:
#Splitting data into inputs as x and outputs as y
x = df.iloc[:, :16].values
y = df.iloc[:, -1:].values

In [7]:
#then we check the input rows and coulmns shape
x.shape

(11162, 16)

In [8]:
x

array([[59, 'admin.', 'married', ..., -1, 0, 'unknown'],
       [56, 'admin.', 'married', ..., -1, 0, 'unknown'],
       [41, 'technician', 'married', ..., -1, 0, 'unknown'],
       ...,
       [32, 'technician', 'single', ..., -1, 0, 'unknown'],
       [43, 'technician', 'married', ..., 172, 5, 'failure'],
       [34, 'technician', 'married', ..., -1, 0, 'unknown']], dtype=object)

In [9]:
y.shape

(11162, 1)

In [10]:
y

array([['yes'],
       ['yes'],
       ['yes'],
       ...,
       ['no'],
       ['no'],
       ['no']], dtype=object)

In [11]:
# Handling categorical data
ct = ColumnTransformer([("one",OneHotEncoder(),[1,2,3,4,6,7,8,10,15])],remainder = "passthrough")
x = ct.fit_transform(x)

In [12]:
# as we see the categorical data converted to binary
x

array([[1.0, 0.0, 0.0, ..., 1, -1, 0],
       [1.0, 0.0, 0.0, ..., 1, -1, 0],
       [0.0, 0.0, 0.0, ..., 1, -1, 0],
       ...,
       [0.0, 0.0, 0.0, ..., 2, -1, 0],
       [0.0, 0.0, 0.0, ..., 2, 172, 5],
       [0.0, 0.0, 0.0, ..., 1, -1, 0]], dtype=object)

In [13]:
ct2 = ColumnTransformer([('one', OneHotEncoder(), [0])], remainder = 'passthrough')
y = ct2.fit_transform(y)

In [14]:
#as we see the yes and no data converted to 0 and 1
y

array([[0., 1.],
       [0., 1.],
       [0., 1.],
       ...,
       [1., 0.],
       [1., 0.],
       [1., 0.]])

In [15]:
# Splitting data to train sets and test sets
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.2, random_state = 0)

In [16]:
x_train

array([[0.0, 0.0, 0.0, ..., 1, -1, 0],
       [0.0, 0.0, 0.0, ..., 1, -1, 0],
       [0.0, 1.0, 0.0, ..., 5, -1, 0],
       ...,
       [0.0, 0.0, 0.0, ..., 8, -1, 0],
       [1.0, 0.0, 0.0, ..., 1, -1, 0],
       [0.0, 0.0, 0.0, ..., 1, -1, 0]], dtype=object)

In [17]:
y_train

array([[1., 0.],
       [0., 1.],
       [0., 1.],
       ...,
       [1., 0.],
       [1., 0.],
       [0., 1.]])

In [18]:
print('Inputs shape ', x.shape)
print('Outputs shape ', y.shape)
print('x_train shape ', x_train.shape)
print('x_test shape ', x_test.shape)
print('y_train shape ', y_train.shape)
print('y_test shape', y_test.shape)

Inputs shape  (11162, 51)
Outputs shape  (11162, 2)
x_train shape  (8929, 51)
x_test shape  (2233, 51)
y_train shape  (8929, 2)
y_test shape (2233, 2)


In [19]:
#feature scaling 
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.fit_transform(x_test)

In [20]:
# as we see compare to the previous x_train data the values has been scaled to similar unified range
x_train

array([[-0.37172192, -0.45760242, -0.17658264, ..., -0.56394809,
        -0.48428507, -0.35888264],
       [-0.37172192, -0.45760242, -0.17658264, ..., -0.56394809,
        -0.48428507, -0.35888264],
       [-0.37172192,  2.18530314, -0.17658264, ...,  0.92827075,
        -0.48428507, -0.35888264],
       ...,
       [-0.37172192, -0.45760242, -0.17658264, ...,  2.04743488,
        -0.48428507, -0.35888264],
       [ 2.69018306, -0.45760242, -0.17658264, ..., -0.56394809,
        -0.48428507, -0.35888264],
       [-0.37172192, -0.45760242, -0.17658264, ..., -0.56394809,
        -0.48428507, -0.35888264]])

In [21]:
#same scaling feature thing goes for x_text data value
x_test

array([[-0.35506564, -0.46571851, -0.16329932, ..., -0.51893725,
         2.62409874,  0.62465525],
       [-0.35506564, -0.46571851, -0.16329932, ..., -0.51893725,
         1.30248101,  1.63872661],
       [-0.35506564, -0.46571851, -0.16329932, ..., -0.17189116,
        -0.46885387, -0.38941612],
       ...,
       [-0.35506564, -0.46571851, -0.16329932, ..., -0.17189116,
        -0.46885387, -0.38941612],
       [ 2.81638066, -0.46571851, -0.16329932, ..., -0.51893725,
        -0.46885387, -0.38941612],
       [-0.35506564, -0.46571851, -0.16329932, ..., -0.17189116,
        -0.46885387, -0.38941612]])

In [22]:
# now we do the deep learning ann modeling
ob = Sequential()
ob.add(Dense(units = 50, kernel_initializer = 'random_uniform', activation = 'relu'))# Input layer
ob.add(Dense(units = 100, kernel_initializer = 'random_uniform', activation = 'relu'))# Hidden layer 1 
ob.add(Dense(units = 100, kernel_initializer = 'random_uniform', activation = 'relu'))# Hidden layer 2
ob.add(Dense(units = 2, kernel_initializer = 'random_uniform', activation = 'softmax'))# Output layer 
ob.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])# Compiling model 
ob.fit(x_train, y_train, epochs = 100)# Training the model 

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<tensorflow.python.keras.callbacks.History at 0x1eba7850d90>

In [23]:
ypred = ob.predict(x_test)
ypred

array([[1.0000000e+00, 9.1965632e-23],
       [3.5886452e-15, 1.0000000e+00],
       [1.0000000e+00, 6.8453629e-17],
       ...,
       [6.1810006e-06, 9.9999380e-01],
       [1.0000000e+00, 3.8671455e-14],
       [1.0000000e+00, 5.4586826e-13]], dtype=float32)

In [24]:
df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,deposit
0,59,admin.,married,secondary,no,2343,yes,no,unknown,5,may,1042,1,-1,0,unknown,yes
1,56,admin.,married,secondary,no,45,no,no,unknown,5,may,1467,1,-1,0,unknown,yes
2,41,technician,married,secondary,no,1270,yes,no,unknown,5,may,1389,1,-1,0,unknown,yes
3,55,services,married,secondary,no,2476,yes,no,unknown,5,may,579,1,-1,0,unknown,yes
4,54,admin.,married,tertiary,no,184,no,no,unknown,5,may,673,2,-1,0,unknown,yes
