### Importing the Libraries

In [None]:
import pandas as pd, numpy as np, matplotlib.pyplot as plt, seaborn as sns

In [None]:
pd.set_option('display.max_rows', None)

### Importing the dataset

In [None]:
dataset = pd.read_csv('../input/loan-prediction-problem-dataset/train_u6lujuX_CVtuZ9i.csv')
dataset.head()

### Checking the shape (rows, columns)

In [None]:
dataset.shape

### Checking the info of the dataset

In [None]:
dataset.info()

### Checking how many columns have how many null values

In [None]:
dataset.isnull().sum()

### Converting the NULL values into desirable values where required and applicable and retaining the data type of the columns

In [None]:
dataset['LoanAmount'] = dataset['LoanAmount'].fillna(dataset['LoanAmount'].mean())
dataset['LoanAmount'] = dataset['LoanAmount'].astype(float)

In [None]:
dataset['Loan_Amount_Term'] = dataset['Loan_Amount_Term'].fillna(dataset['Loan_Amount_Term'].median())
dataset['Loan_Amount_Term'] = dataset['Loan_Amount_Term'].astype(float)

In [None]:
dataset['Credit_History'] = dataset['Credit_History'].fillna(dataset['Credit_History'].median())
dataset['Credit_History'] = dataset['Credit_History'].astype(float)

### Drop the rest of the data where NULL cannot be filled

In [None]:
dataset.dropna(inplace = True)

### Prepare the X

In [None]:
X = dataset.drop(columns=['Loan_ID','Loan_Status'])

dep = {'0':'0','1':'1','2':'2','3+':'3'}
X['Dependents'] = X['Dependents'].map(dep)

X.info()

In [None]:
X.isnull().sum()

### Prepare the y and Encode Categorical Values to 0 and 1

In [None]:
y = dataset['Loan_Status']
y.head()

In [None]:
from sklearn.preprocessing import LabelEncoder

lb_make = LabelEncoder()
y = lb_make.fit_transform(y)

y

### Encode all Categorical Values for X

In [None]:
##Encode Categorical Values

from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
categorical_features = ['Gender', 'Married','Dependents','Education','Self_Employed','Property_Area']
#categorical_features = ['Dependents', 'Education','Self_Employed','Loan_Amount_Term', 'Credit_History', 'Property_Area']
one_hot = OneHotEncoder()
transformer = ColumnTransformer([("one_hot",one_hot,categorical_features)], remainder = 'passthrough')
X = transformer.fit_transform(X)

In [None]:
### Splitting data into test set and training set

from sklearn.model_selection import train_test_split
X_train , X_test , y_train , y_test = train_test_split(X,y,test_size = 0.2, random_state = 0)

In [None]:
### Feature Scaling

from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [None]:
X_train.shape, X_test.shape, y_train.shape

### Import keras

In [None]:
import keras
from keras.models import Sequential 
from keras.layers import Dense,Dropout

### Initialise the Artificial Neural Network

In [None]:
classifier = Sequential()

### Adding the input layer and the first hidden layer
#### The no. of units is something that one can achieve by doing regular experimentation or using parameter tunig.But 
#### As a tip can be used as the average of the no. of input variables + no. of output variables
#### Input_dim will be equaly to the no. of input variables

In [None]:
classifier.add(Dense(units = 12 , kernel_initializer = 'uniform' , activation = 'relu' , input_dim = 20))
classifier.add(Dropout(0.5))

### Adding the 2nd hidden layer

In [None]:
classifier.add(Dense(units = 12 , kernel_initializer = 'uniform' , activation = 'relu'))
classifier.add(Dropout(0.5))

###### Here we have only created 2 hidden layers. based on the scenario or experements one can create more hidden layers or leave it to 1

### Adding the output layer
##### Units will be 1 as this is the output layer and we are just giving 1 output either Y or N
##### Use activation = softmax if the model has more than 2 classifications

In [None]:
classifier.add(Dense(units = 1 , kernel_initializer = 'uniform' , activation = 'sigmoid'))

### Compiling the ANN
##### Using 'binary_crossentropy' as there are only 2 outcomes, Y or N


In [None]:
classifier.compile(optimizer = 'adam' , loss = 'binary_crossentropy' , metrics = ['accuracy'])

In [None]:
classifier.summary()

### Fitting the training set to the ANN
##### Say we have 230 items in the training set. batch size of 10 means that we will send 10 items per iterations.
##### The no. of iterations would be 230/10 = 23
##### All these 23 iterations will be part of 1 epoch

In [None]:
classifier.fit(X_train , y_train , batch_size = 10 , epochs = 100)

### Predicting the results

In [None]:
y_pred = classifier.predict(X_test)  ### will give the probability as the output

y_pred = (y_pred > 0.5) ### to see the true and false results
y_pred

### Making the confusion matrix

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

### Evaluating the ANN 

In [None]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score

def build_classifier ():
    classifier = Sequential()
    classifier.add(Dense(units = 12 , kernel_initializer = 'uniform' , activation = 'relu' , input_dim = 20))
    classifier.add(Dropout(0.3))
    classifier.add(Dense(units = 12 , kernel_initializer = 'uniform' , activation = 'relu'))
    classifier.add(Dropout(0.3))
    classifier.add(Dense(units = 1 , kernel_initializer = 'uniform' , activation = 'sigmoid'))
    classifier.compile(optimizer = 'adam' , loss = 'binary_crossentropy' , metrics = ['accuracy'])
    return classifier

In [None]:
classifier = KerasClassifier(build_fn = build_classifier, batch_size = 10 , epochs = 100)
accuracies = cross_val_score(estimator=classifier, X = X_train, y = y_train , cv = 10, n_jobs=1,verbose=1)

In [None]:
mean = accuracies.mean()
variance = accuracies.std()
print(mean , variance)

### Improving the ANN
##### Use dropout if required and in case of overfitting

In [None]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense

def build_classifier (optimizer):
    classifier = Sequential()
    classifier.add(Dense(units = 11 , kernel_initializer = 'uniform' , activation = 'relu' , input_dim = 20))
    classifier.add(Dropout(0.4))
    classifier.add(Dense(units = 11 , kernel_initializer = 'uniform' , activation = 'relu'))
    classifier.add(Dropout(0.4))
    classifier.add(Dense(units = 11 , kernel_initializer = 'uniform' , activation = 'relu'))
    classifier.add(Dropout(0.4))
    classifier.add(Dense(units = 1 , kernel_initializer = 'uniform' , activation = 'sigmoid'))
    classifier.compile(optimizer = 'adam' , loss = 'binary_crossentropy' , metrics = ['accuracy'])
    return classifier

classifier = KerasClassifier(build_fn = build_classifier)
parameter = {'batch_size' : [32,35,38],
             'epochs' : [50,100,500],
            'optimizer' : ['adam','rmsprop']}
grid_search = GridSearchCV(estimator = classifier, param_grid = parameter,scoring = 'accuracy',cv=10)
grid_search = grid_search.fit(X_train,y_train)


### Checking the best params for the ANN model

In [None]:
best_parameters = grid_search.best_params_
best_accuracy = grid_search.best_score_
print(best_parameters,
     best_accuracy)