**Implement a simple feed-forward neural network model for the loan dataset and then evaluate the performance on a 80/20 split. Exclude irrelevant columns and perform one-hot encoding and normalization of each column, if needed.**

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.neural_network import MLPClassifier

In [2]:
loan_data=pd.read_excel('loan.xlsx')

In [3]:
print(loan_data.shape)
loan_data

(429, 14)


Unnamed: 0,Sex,Age,Time_at_address,Res_status,Telephone,Occupation,Job_status,Time_employed,Time_bank,Liab_ref,Acc_ref,Home_Expn,Balance,Decision
0,M,50.750000,0.585,owner,given,unemploye,unemploye,0,0,f,given,145,0,reject
1,M,19.670000,10.000,rent,not_given,labourer,governmen,0,0,t,given,140,0,reject
2,F,52.830002,15.000,owner,given,creative_,private_s,5,14,f,given,0,2200,accept
3,M,22.670000,2.540,rent,not_given,creative_,governmen,2,0,f,given,0,0,accept
4,M,29.250000,13.000,owner,given,driver,governmen,0,0,f,given,228,0,reject
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
424,M,34.169998,2.750,owner,given,guard_etc,self_empl,2,0,t,given,232,200,reject
425,F,22.250000,1.250,rent,not_given,unemploye,unemploye,3,0,f,given,280,0,reject
426,M,23.330000,1.500,owner,given,creative_,governmen,1,0,f,given,422,200,accept
427,M,21.000000,4.790,rent,not_given,productio,private_s,2,1,t,given,80,300,accept


In [4]:
loan_data.isnull().sum()

Sex                0
Age                0
Time_at_address    0
Res_status         0
Telephone          0
Occupation         0
Job_status         0
Time_employed      0
Time_bank          0
Liab_ref           0
Acc_ref            0
Home_Expn          0
Balance            0
Decision           0
dtype: int64

In [5]:
loan_data.head()
loan_data.shape

(429, 14)

In [6]:
#Using the iloc attribute to slect all rows and all columns except he last column
X=loan_data.iloc[:,:-1].values
X

array([['M', 50.75, 0.584999978542328, ..., 'given', 145, 0],
       ['M', 19.6700000762939, 10.0, ..., 'given', 140, 0],
       ['F', 52.8300018310547, 15.0, ..., 'given', 0, 2200],
       ...,
       ['M', 23.3299999237061, 1.5, ..., 'given', 422, 200],
       ['M', 21.0, 4.78999996185303, ..., 'given', 80, 300],
       ['M', 27.75, 1.28999996185303, ..., 'oth_inst_', 140, 0]],
      dtype=object)

In [7]:
y=loan_data.iloc[:,-1].values

In [8]:
loan_data.dtypes

Sex                 object
Age                float64
Time_at_address    float64
Res_status          object
Telephone           object
Occupation          object
Job_status          object
Time_employed        int64
Time_bank            int64
Liab_ref            object
Acc_ref             object
Home_Expn            int64
Balance              int64
Decision            object
dtype: object

In [9]:
#Performing one-hot encoding on categorical features in X variable
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct=ColumnTransformer(transformers=[('encoder',OneHotEncoder(),[0,3,4,5,6,9,10])],remainder='passthrough')
x=np.array(ct.fit_transform(X))

In [10]:
#Performing label encoding on target variable 'y'. Using fiy_transform method to fit the labelencoder to target variable and transform he categorical values into numerical
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()
y=le.fit_transform(y)

In [11]:
print(x)
print(x.shape)

[[0.0 1.0 1.0 ... 0 145 0]
 [0.0 1.0 0.0 ... 0 140 0]
 [1.0 0.0 1.0 ... 14 0 2200]
 ...
 [0.0 1.0 1.0 ... 0 422 200]
 [0.0 1.0 0.0 ... 1 80 300]
 [0.0 1.0 1.0 ... 0 140 0]]
(429, 35)


In [12]:
print(y.shape)

(429,)


In [13]:
#Splitting the encoded encoded feature variables 'x' & the target variables 'y' into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.25, random_state = 2)

In [14]:
print(X_train)
print(y_train.shape)
print(X_test)
print(y_test.shape)

[[1.0 0.0 1.0 ... 6 0 1260]
 [0.0 1.0 1.0 ... 0 60 100]
 [0.0 1.0 1.0 ... 0 80 350]
 ...
 [0.0 1.0 0.0 ... 0 120 375]
 [1.0 0.0 1.0 ... 0 0 0]
 [1.0 0.0 1.0 ... 3 212 0]]
(321,)
[[0.0 1.0 1.0 ... 3 0 582]
 [1.0 0.0 1.0 ... 1 120 1]
 [0.0 1.0 1.0 ... 0 132 0]
 ...
 [0.0 1.0 1.0 ... 0 140 0]
 [0.0 1.0 1.0 ... 2 160 587]
 [0.0 1.0 1.0 ... 3 180 0]]
(108,)


In [15]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(128, activation=tf.nn.relu))
model.add(tf.keras.layers.Dense(10, activation=tf.nn.sigmoid))

In [16]:
model.compile(optimizer = 'adam', loss = tf.keras.losses.MeanSquaredError(), metrics=['accuracy'])

In [17]:
model = MLPClassifier()
model.fit(X_train, y_train)

accuracy = model.score(X_test, y_test)

print(accuracy)

0.7037037037037037
