# Loan Prediction using Artificial Neural Network

## Importing the libraries

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf

In [None]:
tf.__version__

### Importing the dataset

In [None]:
data = pd.read_csv('../input/loan-prediction-problem-dataset/train_u6lujuX_CVtuZ9i.csv')
data.shape

The dataset contains 614 rows and 13 columns.

In [None]:
# let's check the head of the dataset
data.head()

Loan_Status is the target column

## Data Preprocessing

### checking for missing values

In [None]:
data.isnull().sum()

There are 7 columns which contain missing values.

In [None]:
# imputing missing values
# imputing missing values
data['LoanAmount']=data['LoanAmount'].fillna(data['LoanAmount'].mean())
data['Credit_History']=data['Credit_History'].fillna(data['Credit_History'].median())
data.dropna(inplace=True)

In [None]:
data.isnull().sum().sum()

In [None]:
# splitting the dependent and independent variables
X = data.iloc[: , 1:-1].values
Y = data.iloc[: ,-1].values

In [None]:
print(X)
print(Y)

In [None]:
Y = np.where(Y=='Y',1,Y)
Y = np.where(Y=='N',0,Y)
print(Y)

In [None]:
Y = Y.astype('int')
Y.dtype

In [None]:
X[: ,2] = np.where(X[:, 2]=='3+',3,X[: ,2])
X[:, 2]

## Encoding categorical data

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
X[:, 0] = le.fit_transform(X[:, 0])
X[:, 1] = le.fit_transform(X[:, 1])
X[:, 3] = le.fit_transform(X[:, 3])
X[:, 4] = le.fit_transform(X[:, 4])

In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [10])], remainder='passthrough')
X = np.array(ct.fit_transform(X))

In [None]:
X = X.astype('int')
X.dtype

### Splitting the dataset into training and test data

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.2, random_state = 0)

#### Feature Scaling

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

## Building the ANN

In [None]:
# Initializing ANN
ann = tf.keras.models.Sequential()

In [None]:
# adding input and first hidden layer
ann.add(tf.keras.layers.Dense(units=8, activation='relu'))

In [None]:
# adding second hidden layer
ann.add(tf.keras.layers.Dense(units=6, activation='relu'))

In [None]:
# adding third hidden layer
ann.add(tf.keras.layers.Dense(units=6, activation='relu'))

In [None]:
# adding output layer
ann.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

In [None]:
# compiling ANN
ann.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

In [None]:
# training ANN on training set
ann.fit(X_train, y_train, batch_size = 32, epochs = 150)

In [None]:
y_pred = ann.predict(X_test)
y_pred = (y_pred > 0.5)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)