In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df = pd.read_csv("../input/lending-club-loan-data-analysis/loan_data.csv")
df.head()

In [None]:
df.shape

In [None]:
df.describe()

In [None]:
df.isnull().sum()

In [None]:
df['not.fully.paid'].value_counts()

In [None]:
#handling imbalanced dataset
not_fully_paid_0 = df[df['not.fully.paid'] == 0]
not_fully_paid_1 = df[df['not.fully.paid'] == 1]

print('not_fully_paid_0', not_fully_paid_0.shape)
print('not_fully_paid_1', not_fully_paid_1.shape)

In [None]:
#handling imbalanced data
from sklearn.utils import resample
df_minority_upsampled = resample(not_fully_paid_1, replace = True, n_samples = 8045)
new_df = pd.concat([not_fully_paid_0, df_minority_upsampled])

from sklearn.utils import shuffle
new_df = shuffle(new_df)

In [None]:
new_df['not.fully.paid'].value_counts()  #imbalanced data handled

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

for i in new_df.columns:
    if new_df[i].dtypes == 'object':
        
        new_df[i] = le.fit_transform(new_df[i])

In [None]:
new_df.head()

In [None]:
new_df.corr().abs()['not.fully.paid'].sort_values(ascending = False)

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
f,axes = plt.subplots(1,1,figsize = (12,12))
sns.heatmap(new_df.corr(), annot = True, ax = axes)

In [None]:
X = new_df[["int.rate", "fico", "credit.policy", "inq.last.6mths", "revol.util", "pub.rec", "installment", "purpose"]]
y = new_df["not.fully.paid"]

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

In [None]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [None]:
from tensorflow import keras

model = keras.Sequential(
    [
        keras.layers.Dense(
            256, activation="relu", input_shape=[8]
        ),
        keras.layers.Dense(256, activation="relu"),
        keras.layers.Dropout(0.3),
        keras.layers.Dense(256, activation="relu"),
        keras.layers.Dropout(0.3),
        keras.layers.Dense(1, activation="sigmoid"),
    ]
)
model.summary()

In [None]:
model.compile(optimizer = 'Adam', loss = 'binary_crossentropy', metrics = ['binary_accuracy'])

In [None]:
early_stopping = keras.callbacks.EarlyStopping(patience = 10, min_delta = 0.001, 
                                               restore_best_weights =True )
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    batch_size=256,
    epochs=1000,
    callbacks=[early_stopping],
    verbose=1, 
)

In [None]:
predictions =(model.predict(X_test)>0.5).astype("int32")
              
predictions

In [None]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
accuracy_score(y_test, predictions)

In [None]:
print(classification_report(y_test, predictions))