# Importing Required Libraries

In [1]:
import numpy as np
import pandas as pd

# Reading the dataset

In [2]:
dataset = pd.read_csv("bank.csv")

In [3]:
dataset.head(10)

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,deposit
0,59,admin.,married,secondary,no,2343,yes,no,unknown,5,may,1042,1,-1,0,unknown,yes
1,56,admin.,married,secondary,no,45,no,no,unknown,5,may,1467,1,-1,0,unknown,yes
2,41,technician,married,secondary,no,1270,yes,no,unknown,5,may,1389,1,-1,0,unknown,yes
3,55,services,married,secondary,no,2476,yes,no,unknown,5,may,579,1,-1,0,unknown,yes
4,54,admin.,married,tertiary,no,184,no,no,unknown,5,may,673,2,-1,0,unknown,yes
5,42,management,single,tertiary,no,0,yes,yes,unknown,5,may,562,2,-1,0,unknown,yes
6,56,management,married,tertiary,no,830,yes,yes,unknown,6,may,1201,1,-1,0,unknown,yes
7,60,retired,divorced,secondary,no,545,yes,no,unknown,6,may,1030,1,-1,0,unknown,yes
8,37,technician,married,secondary,no,1,yes,no,unknown,6,may,608,1,-1,0,unknown,yes
9,28,services,single,secondary,no,5090,yes,no,unknown,6,may,1297,3,-1,0,unknown,yes


In [4]:
dataset.describe()

Unnamed: 0,age,balance,day,duration,campaign,pdays,previous
count,11162.0,11162.0,11162.0,11162.0,11162.0,11162.0,11162.0
mean,41.231948,1528.538524,15.658036,371.993818,2.508421,51.330407,0.832557
std,11.913369,3225.413326,8.42074,347.128386,2.722077,108.758282,2.292007
min,18.0,-6847.0,1.0,2.0,1.0,-1.0,0.0
25%,32.0,122.0,8.0,138.0,1.0,-1.0,0.0
50%,39.0,550.0,15.0,255.0,2.0,-1.0,0.0
75%,49.0,1708.0,22.0,496.0,3.0,20.75,1.0
max,95.0,81204.0,31.0,3881.0,63.0,854.0,58.0


In [5]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11162 entries, 0 to 11161
Data columns (total 17 columns):
age          11162 non-null int64
job          11162 non-null object
marital      11162 non-null object
education    11162 non-null object
default      11162 non-null object
balance      11162 non-null int64
housing      11162 non-null object
loan         11162 non-null object
contact      11162 non-null object
day          11162 non-null int64
month        11162 non-null object
duration     11162 non-null int64
campaign     11162 non-null int64
pdays        11162 non-null int64
previous     11162 non-null int64
poutcome     11162 non-null object
deposit      11162 non-null object
dtypes: int64(7), object(10)
memory usage: 1.4+ MB


# Checking and Handling missing values

In [6]:
dataset.isnull().sum()

age          0
job          0
marital      0
education    0
default      0
balance      0
housing      0
loan         0
contact      0
day          0
month        0
duration     0
campaign     0
pdays        0
previous     0
poutcome     0
deposit      0
dtype: int64

# Splitting dataset into x and y

In [8]:
x = dataset.iloc[:, 0:16].values
y = dataset.iloc[:, 16].values

In [9]:
x

array([[59, 'admin.', 'married', ..., -1, 0, 'unknown'],
       [56, 'admin.', 'married', ..., -1, 0, 'unknown'],
       [41, 'technician', 'married', ..., -1, 0, 'unknown'],
       ...,
       [32, 'technician', 'single', ..., -1, 0, 'unknown'],
       [43, 'technician', 'married', ..., 172, 5, 'failure'],
       [34, 'technician', 'married', ..., -1, 0, 'unknown']], dtype=object)

In [10]:
dataset['job'].unique()

array(['admin.', 'technician', 'services', 'management', 'retired',
       'blue-collar', 'unemployed', 'entrepreneur', 'housemaid',
       'unknown', 'self-employed', 'student'], dtype=object)

In [11]:
dataset['marital'].unique()

array(['married', 'single', 'divorced'], dtype=object)

In [12]:
dataset['education'].unique()

array(['secondary', 'tertiary', 'primary', 'unknown'], dtype=object)

In [13]:
dataset['housing'].unique()

array(['yes', 'no'], dtype=object)

In [14]:
dataset['loan'].unique()

array(['no', 'yes'], dtype=object)

In [15]:
dataset['month'].unique()

array(['may', 'jun', 'jul', 'aug', 'oct', 'nov', 'dec', 'jan', 'feb',
       'mar', 'apr', 'sep'], dtype=object)

In [16]:
dataset['poutcome'].unique()

array(['unknown', 'other', 'failure', 'success'], dtype=object)

In [17]:
dataset['deposit'].unique()

array(['yes', 'no'], dtype=object)

In [18]:
dataset['default'].unique()

array(['no', 'yes'], dtype=object)

# Handling Categorical data

In [19]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

In [20]:
ct = ColumnTransformer([("one",OneHotEncoder(),[1,2,3,4,6,7,8,10,15])],remainder = "passthrough")
x = ct.fit_transform(x)

In [21]:
x

array([[1.0, 0.0, 0.0, ..., 1, -1, 0],
       [1.0, 0.0, 0.0, ..., 1, -1, 0],
       [0.0, 0.0, 0.0, ..., 1, -1, 0],
       ...,
       [0.0, 0.0, 0.0, ..., 2, -1, 0],
       [0.0, 0.0, 0.0, ..., 2, 172, 5],
       [0.0, 0.0, 0.0, ..., 1, -1, 0]], dtype=object)

In [28]:
y = y.reshape(-1,1)

In [29]:
ct2 = ColumnTransformer([("encoder",OneHotEncoder(),[0])],remainder = "passthrough")
y = ct2.fit_transform(y)

In [30]:
y

array([[0., 1.],
       [0., 1.],
       [0., 1.],
       ...,
       [1., 0.],
       [1., 0.],
       [1., 0.]])

In [31]:
y.reshape(1,-1)

array([[0., 1., 0., ..., 0., 1., 0.]])

# Splitting the data into train and test

In [32]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.2,random_state = 0)

In [33]:
print(x.shape,"xshape")
print(y.shape,"yshape")
print(x_train.shape,"x_train shape")
print(x_test.shape,"x_test shape")
print(y_train.shape,"y_train shape")
print(y_test.shape,"y_test shape")

(11162, 51) xshape
(11162, 2) yshape
(8929, 51) x_train shape
(2233, 51) x_test shape
(8929, 2) y_train shape
(2233, 2) y_test shape


In [36]:
inp = x

# Applying feature scaling

In [37]:
from sklearn.preprocessing import StandardScaler
d = StandardScaler()
x = d.fit_transform(x)

In [44]:
x_train[0:5,:]

array([[-0.37172192, -0.45760242, -0.17658264, -0.1589378 ,  1.8452729 ,
        -0.27363591, -0.1931085 , -0.30435733, -0.17992689, -0.44212513,
        -0.18025832, -0.08293772, -0.36485864,  0.86351031, -0.66975544,
        -0.39346051,  1.01614555, -0.69993206, -0.21741017,  0.12621015,
        -0.12621015, -1.05825117,  1.05825117,  0.39003722, -0.39003722,
        -1.60273048, -0.27292594,  1.93439945, -0.30193478, -0.39913565,
        -0.09686471, -0.27410848, -0.17488949, -0.39459853, -0.34910544,
        -0.15930776,  1.70870313, -0.30215554, -0.1931085 , -0.16761389,
        -0.35031555, -0.22657536, -0.32939951,  0.58661881,  0.64941897,
        -0.44565283, -0.90504798, -0.2483547 , -0.56394809, -0.48428507,
        -0.35888264],
       [-0.37172192, -0.45760242, -0.17658264, -0.1589378 , -0.54192526,
         3.65449114, -0.1931085 , -0.30435733, -0.17992689, -0.44212513,
        -0.18025832, -0.08293772,  2.7407875 , -1.15806377, -0.66975544,
        -0.39346051,  1.01614

In [40]:
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.fit_transform(x_test)

In [45]:
x_train[0:5,:]

array([[-0.37172192, -0.45760242, -0.17658264, -0.1589378 ,  1.8452729 ,
        -0.27363591, -0.1931085 , -0.30435733, -0.17992689, -0.44212513,
        -0.18025832, -0.08293772, -0.36485864,  0.86351031, -0.66975544,
        -0.39346051,  1.01614555, -0.69993206, -0.21741017,  0.12621015,
        -0.12621015, -1.05825117,  1.05825117,  0.39003722, -0.39003722,
        -1.60273048, -0.27292594,  1.93439945, -0.30193478, -0.39913565,
        -0.09686471, -0.27410848, -0.17488949, -0.39459853, -0.34910544,
        -0.15930776,  1.70870313, -0.30215554, -0.1931085 , -0.16761389,
        -0.35031555, -0.22657536, -0.32939951,  0.58661881,  0.64941897,
        -0.44565283, -0.90504798, -0.2483547 , -0.56394809, -0.48428507,
        -0.35888264],
       [-0.37172192, -0.45760242, -0.17658264, -0.1589378 , -0.54192526,
         3.65449114, -0.1931085 , -0.30435733, -0.17992689, -0.44212513,
        -0.18025832, -0.08293772,  2.7407875 , -1.15806377, -0.66975544,
        -0.39346051,  1.01614