In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix
from collections import Counter
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from imblearn.under_sampling import RandomUnderSampler 
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, classification_report
from sklearn.ensemble import GradientBoostingClassifier
import pydotplus
from sklearn import tree
from IPython.display import Image
import itertools
import numpy as np
import matplotlib.pyplot as plt
from imblearn.over_sampling import SMOTE, ADASYN
from sklearn.datasets import make_classification
from sklearn.model_selection import StratifiedKFold
from sklearn.feature_selection import RFE
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.feature_selection import RFECV
from sklearn.ensemble import RandomForestClassifier
import os, errno
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPClassifier
from sklearn.cluster import DBSCAN

# Utilities 

In [None]:

def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues, save="False", path="/home/"):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()
    if save is True:
        plt.savefig(path+".pdf")
    plt.clf()

# Filtering Attributes

In [None]:
reduced = pd.read_csv("/home/dataset.csv")
reduced["sums"] = reduced.iloc[:, 5:10].sum(axis=1)
reduced["g3"] = reduced.iloc[:, 5:10].ge(1,axis=1).sum(axis=1)

cols = reduced.columns.tolist()
cols.remove('age')
cols.remove('status')
cols.remove('education')
cols.remove('sex')
cols.remove('credit_default')
cols.remove('pa-apr')
cols.remove('pa-may')
cols.remove('pa-jun')
cols.remove('pa-jul')
cols.remove('pa-aug')
cols.remove('pa-sep')
cols.remove('ps-apr')
cols.remove('ps-may')
cols.remove('ps-jun')
cols.remove('ps-jul')
cols.remove('ps-aug')
cols.remove('ps-sep')
cols.remove('sums')
cols.remove('ba-apr')
cols.remove('ba-may')
cols.remove('ba-jun')
cols.remove('ba-jul')
cols.remove('ba-aug')
cols.remove('ba-sep')
cols.remove('g3')

    
reduced = reduced.filter(['g3','ps-sep','ps-apr','ps-jul','ps-aug','ps-jun','ps-may','credit_default'], axis=1)
#reduced = reduced.filter(['ps-sep','g3','ps-apr','credit_default'], axis=1)
#reduced= reduced.drop(columns=['limit','varps','ps-apr','ps-may','ps-aug','ps-jul','ps-jun','ba-sep','ba-sep','ba-aug','ba-jul','ba-jun','ba-may','ba-apr','pa-sep','pa-aug','pa-jun','pa-may','pa-apr','sums'])
#print(reduced.head)



# Spit dataset in Training and Test set

In [None]:
attributes = [col for col in reduced.columns if col != 'credit_default']
X = reduced[attributes]
y = reduced['credit_default']
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.3, random_state=100, stratify=y)

# UnderSampling

In [None]:
rat=1

sm = RandomUnderSampler(ratio=rat, random_state=42)

X_train, y_train = sm.fit_sample(X_train, y_train)
X_train.shape, y_train.shape 
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

# Multi Layer Perceptron

In [None]:
activation=['relu', 'tanh', 'logistic']
solver=['lbfgs', 'sgd', 'adam'] 
alpha=[0.0001,1e-5,0.01,0.001]
batch_size=[1,100]
learning_rate=['constant'] 
learning_rate_init=[0.001,0.01,0.2,0.3, 0.8, 1] 

param_grid=dict(hidden_layer_sizes=(7,), activation=activation, batch_size=batch_size,learning_rate=learning_rate,learning_rate_init=learning_rate_init)
mlp = MLPClassifier(random_state=100, max_iter=1000)
grid = GridSearchCV(mlp, param_grid, cv=StratifiedKFold(15), scoring='f1')

grid_result=grid.fit(X_train, y_train)
nn_performance = roc_auc_score(y_test, grid_result.predict_proba(X_test)[:, 1])
print("NeuralNet: Area under the ROC curve = {}".format(nn_performance))
opt_nn = grid_result.best_estimator_
print("{}".format(grid_result.best_params_))


# Scores

In [None]:
y_pred3 = opt_nn.predict(X_train)
a1 = ('Accuracy %s' % accuracy_score(y_train, y_pred3))
print(a1)
r1 = ('Recall: %s' %  recall_score(y_train, y_pred3, average='weighted'))
print(r1)
p1 = ('Precision: %s' %  precision_score(y_train, y_pred3, average='weighted'))
print(p1)
f1=('F1-score %s' % f1_score(y_train, y_pred3, average='weighted'))
print(f1)
report3 = classification_report(y_train, y_pred3)
print(report3)
cm4 =confusion_matrix(y_train, y_pred3)
plot_confusion_matrix2(cm4, classes=opt_nn.classes_, normalize=True, title='Normalized confusion matrix')

In [None]:
y_pred3 = opt_nn.predict(X_train)
a1 = ('Accuracy %s' % accuracy_score(y_train, y_pred3))
print(a1)
r1 = ('Recall: %s' %  recall_score(y_train, y_pred3, average='weighted'))
print(r1)
p1 = ('Precision: %s' %  precision_score(y_train, y_pred3, average='weighted'))
print(p1)
f1=('F1-score %s' % f1_score(y_train, y_pred3, average='weighted'))
print(f1)
report3 = classification_report(y_train, y_pred3)
print(report3)
cm4 =confusion_matrix(y_train, y_pred3)
plot_confusion_matrix2(cm4, classes=opt_nn.classes_, normalize=True, title='Normalized confusion matrix')


In [None]:
y_pred4 = opt_nn.predict(X_test)
a2=('Accuracy %s' % accuracy_score(y_test, y_pred4))
print(a2)
r2=('Recall: %s' %  recall_score(y_test, y_pred4, average='weighted'))
print(r2)
p2=('Precision: %s' %  precision_score(y_test, y_pred4, average='weighted'))
print(p2)
f2=('F1-score %s' % f1_score(y_test, y_pred4, average='weighted'))
print(f2)

report4 = classification_report(y_test, y_pred4)
print(report4)
cm5 =confusion_matrix(y_test, y_pred4)
plot_confusion_matrix2(cm5, classes=opt_nn.classes_, normalize=True, title='Normalized confusion matrix')