In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import MinMaxScaler
import warnings
from collections import Counter
warnings.filterwarnings('ignore')
import seaborn as sns
import matplotlib.pyplot as plt
from keras import Sequential
from keras.layers import Dense

In [2]:
df = pd.read_csv('bankloan.csv')

In [3]:
df = df.dropna()

In [4]:
df = df.drop('Loan_ID', axis=1)

In [5]:
df['LoanAmount'] = (df['LoanAmount']*1000).astype(int)

In [6]:
Counter(df['Loan_Status'])['Y']/df['Loan_Status'].size

0.6916666666666667

In [7]:
pre_y = df['Loan_Status']

In [8]:
pre_x = df.drop('Loan_Status', axis=1)

In [9]:
dm_X = pd.get_dummies(pre_x)

In [18]:
dump(dm_X, open('allcol.pkl', 'wb'))

In [11]:
dm_y = pre_y.map(dict(Y=1, N=0))

In [12]:
smote = SMOTE(sampling_strategy='minority', random_state=42)

In [13]:
X1, y = smote.fit_sample(dm_X, dm_y)

In [14]:
sc = MinMaxScaler()

In [15]:
X = sc.fit_transform(X1)

In [16]:
from pickle import dump

dump(sc, open('scaler.pkl', 'wb'))

In [17]:
import sklearn

sklearn.show_versions()


System:
    python: 3.7.3 (default, Mar 27 2019, 16:54:48)  [Clang 4.0.1 (tags/RELEASE_401/final)]
executable: /Users/ryankirkland/anaconda3/bin/python
   machine: Darwin-19.6.0-x86_64-i386-64bit

Python dependencies:
          pip: 20.1.1
   setuptools: 49.2.0
      sklearn: 0.24.1
        numpy: 1.19.5
        scipy: 1.3.0
       Cython: 0.29.12
       pandas: 1.0.3
   matplotlib: 3.0.3
       joblib: 0.13.2
threadpoolctl: 2.1.0

Built with OpenMP: True


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True)

In [None]:
classifier = Sequential()
classifier.add(Dense(200, activation='relu', kernel_initializer='random_normal', input_dim=X_test.shape[1]))
classifier.add(Dense(400, activation='relu', kernel_initializer='random_normal'))
classifier.add(Dense(4, activation='relu', kernel_initializer='random_normal'))
classifier.add(Dense(1, activation='sigmoid', kernel_initializer='random_normal'))
classifier.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
classifier.fit(X_train, y_train, batch_size=20, epochs=50, verbose=0)
eval_model = classifier.evaluate(X_train, y_train)
eval_model

In [None]:
y_pred = classifier.predict(X_test)
y_pred = y_pred>0.5

In [None]:
cm = confusion_matrix(y_test, y_pred)

In [None]:
fig, ax = plt.subplots()
sns.heatmap(cm, annot=True, ax=ax)
ax.set_xlabel('Predicted')
ax.set_ylabel('Actual')
ax.set_title('Confusion Matrix')
ax.xaxis.set_ticklabels(['No', 'Yes'])
ax.yaxis.set_ticklabels(['No', 'Yes'])

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
rf = RandomForestClassifier(max_depth=10)

In [None]:
rf.fit(X_train, y_train)

In [None]:
y_pred_rf = rf.predict(X_test)

In [None]:
cm_rf = confusion_matrix(y_test, y_pred_rf)

In [None]:
fig, ax = plt.subplots()
sns.heatmap(cm_rf, annot=True, ax=ax)
ax.set_xlabel('Predicted')
ax.set_ylabel('Actual')
ax.set_title('Confusion Matrix')
ax.xaxis.set_ticklabels(['No', 'Yes'])
ax.yaxis.set_ticklabels(['No', 'Yes'])

In [None]:
classifier.save('loan_model')

In [None]:
from keras.models import load_model

loaded = load_model('loan_model')

In [None]:
y_pred_loaded = loaded.predict(X_test)
y_pred_loaded = y_pred_loaded>0.5

In [None]:
cm_loaded = confusion_matrix(y_test, y_pred_loaded)

fig, ax = plt.subplots()
sns.heatmap(cm_loaded, annot=True, ax=ax)
ax.set_xlabel('Predicted')
ax.set_ylabel('Actual')
ax.set_title('Confusion Matrix')
ax.xaxis.set_ticklabels(['No', 'Yes'])
ax.yaxis.set_ticklabels(['No', 'Yes'])