In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
data = pd.read_csv('/kaggle/input/lending-club-loan-data-analysis/loan_data.csv')

In [None]:
data.head()

In [None]:
data.columns = ['Credit Policy', 'Loan Purpose', 'Interest Rate', 'Installment', 'Annual Income Log Report', 
                'Debt to Income Ratio', 'FICO Score', 'Days with Credit Line', 'Revolving balance', 'Revolving utilization rate', 
                'Inquiries in last Six Months', 'Delinquency Frequency', 'Public Derogatory Records', 'Will Default']

In [None]:
data.head()

In [None]:
# Checking the missing values. 
data.info()

In [None]:
plt.figure(figsize=(30,6))
sns.countplot(data = data, x = data.info(), palette='dark')

In [None]:
from sklearn.preprocessing import LabelEncoder
ft = LabelEncoder()
data_labels = ft.fit_transform(data['Loan Purpose'])
data_mappings = {index: label for index, label in 
                  enumerate(ft.classes_)}
data_mappings

In [None]:
data['Loan Purpose'] = data_labels
data

In [None]:
data.info()

From the above graph, we can see that there are no null values and also see that the data is balanced. 
Now, lets do the EDA, so that we can conclude the data and start preparing for the predictive modelling. 

In [None]:
data['Loan Purpose'].unique()

In [None]:
data_policy = data.copy()
data_policy['Credit Policy'] = data_policy['Credit Policy'].map({0: 'Non Credit Underwriting Criteria',
                                                                  1: 'Credit Underwriting Criteria'})
data_policy['Will Default'] = data_policy['Will Default'].map({0: 'No', 1: 'Yes'})
plt.figure(figsize=(14,6), dpi = 100)
sns.countplot(data = data_policy, x = 'Credit Policy', hue= 'Will Default', palette = 'dark')
plt.title('Distribution of Credit Policy')
plt.show()

In [None]:
data_credit = data.groupby('Credit Policy')

In [None]:
data_corr = data[['Interest Rate', 'Installment', 'Annual Income Log Report', 'Debt to Income Ratio', 
      'FICO Score','Days with Credit Line', 'Revolving balance', 'Revolving utilization rate',
      'Inquiries in last Six Months', 'Delinquency Frequency']].copy()
plt.figure(figsize=(14,8), dpi = 100)
sns.heatmap(data_corr.corr(),annot=True)

In [None]:
data_scat = data[['Interest Rate', 'Revolving utilization rate', 'Will Default']].copy()
plt.figure(figsize = (14,6))
sns.scatterplot(data = data_scat, x = 'Interest Rate', y = 'Revolving utilization rate', hue = 'Will Default')
plt.show()

In [None]:
X = data.drop('Will Default', axis = 1)
y = data['Will Default'].copy()

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import load_model
from sklearn.metrics import confusion_matrix, classification_report
from pickle import dump, load

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size = 0.75, test_size=0.25, random_state = 101)

In [None]:
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
model = Sequential()


In [None]:
model.add(Dense(19, activation='relu'))

model.add(Dense(10, activation='relu'))

model.add(Dense(5, activation='relu'))

model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy',metrics=['accuracy'])

early_stop = EarlyStopping(monitor='val_loss',mode='min',verbose=1,patience=25)

model.fit(X_train,y_train,epochs=300,batch_size=256,validation_data=(X_test, y_test),callbacks=[early_stop])


In [None]:
model.summary()

In [None]:
loss, acc = model.evaluate(X_test, y_test, verbose=0)
print('Accuracy: %.3f'  % acc)
print('Loss: %.3f' % loss)

In [None]:
y_predict = model.predict(X_test)

In [None]:
y_predict[0]

In [None]:
np.argmax(y_predict[0])

In [None]:
from sklearn import metrics
y_pred = []
for val in y_predict:
    y_pred.append(np.argmax(val))
#print(y_pred)    
#convert 0 1 to 1 and 1 0 as 0
cm = metrics.confusion_matrix(y_test,y_pred)
print(cm)

In [None]:
cr=metrics.classification_report(y_test,y_pred)
print(cr)