In [0]:
from sklearn.base import clone
from sklearn.base import BaseEstimator, ClassifierMixin
# from utils import balanced_class_weights
import numpy as np
import sklearn.metrics
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns
import itertools
from sklearn.metrics import roc_curve, auc, roc_auc_score, log_loss, accuracy_score, confusion_matrix
from patsy import dmatrices
from math import log
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, confusion_matrix  
from sklearn.metrics import precision_recall_fscore_support as score
import copy 
from sklearn.preprocessing import StandardScaler 

df = pd.read_excel('default of credit card clients.xls',header=1)
df = df.drop('ID', axis=1)

In [0]:
def plot_cm(ax, y_true, y_pred, classes, title, th=0.5, cmap=plt.cm.Blues):
    y_pred_labels = (y_pred>th).astype(int)
    
    cm = confusion_matrix(y_true, y_pred_labels)
    
    im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
    ax.set_title(title)

    tick_marks = np.arange(len(classes))
    ax.set_xticks(tick_marks)
    ax.set_yticks(tick_marks)
    ax.set_xticklabels(classes)
    ax.set_yticklabels(classes)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        ax.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")
    ax.set_ylabel('True label')
    ax.set_xlabel('Predicted label')

def plot_auc(ax, y_train, y_train_pred, y_test, y_test_pred, th=0.5):

    y_train_pred_labels = (y_train_pred>th).astype(int)
    y_test_pred_labels  = (y_test_pred>th).astype(int)

    fpr_train, tpr_train, _ = roc_curve(y_train,y_train_pred)
    roc_auc_train = auc(fpr_train, tpr_train)
    acc_train = accuracy_score(y_train, y_train_pred_labels)

    fpr_test, tpr_test, _ = roc_curve(y_test,y_test_pred)
    roc_auc_test = auc(fpr_test, tpr_test)
    acc_test = accuracy_score(y_test, y_test_pred_labels)

    ax.plot(fpr_train, tpr_train)
    ax.plot(fpr_test, tpr_test)

    ax.plot([0, 1], [0, 1], 'k--')

    ax.set_xlim([0.0, 1.0])
    ax.set_ylim([0.0, 1.05])
    ax.set_xlabel('False Positive Rate')
    ax.set_ylabel('True Positive Rate')
    ax.set_title('ROC curve')
    
    train_text = 'train acc = {:.3f}, auc = {:.2f}'.format(acc_train, roc_auc_train)
    test_text = 'test acc = {:.3f}, auc = {:.2f}'.format(acc_test, roc_auc_test)
    ax.legend([train_text, test_text])


### MetaCost

In [0]:
x = df.iloc[:,0:23]
y = df.iloc[:, 23]

In [0]:
def full_resamples(X, y, nresamples):
    n0 = np.sum(y == 0)  # do a stratified full resample
    n1 = np.sum(y == 1)
    _X = np.r_[X[y == 0], X[y == 1]]  # re-order to simplify things
    _y = np.r_[np.zeros(n0, int), np.ones(n1, int)]
    
    s = [None] * nresamples
    for i in range(nresamples):
        r0 = np.random.randint(0, n0, n0)  # full resample
        r1 = np.random.randint(0, n1, n1) + n0
        r = np.r_[r0, r1]
        s[i] = (_X[r], _y[r])
    return s

In [0]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
scaler.fit(X_train)

X_train = scaler.transform(X_train)  
X_test = scaler.transform(X_test) 

In [0]:
c=np.array([[0,10000],[1000,0]])
MLP = MLPClassifier(hidden_layer_sizes=(60,50), max_iter=100,learning_rate_init=0.001) 
m=10
y=y_train
x=X_train
M = [None] * m
y = np.array(y, copy=True)
p=True
# Step 1. Train everything
for i, (Xt, yt) in enumerate(full_resamples(x, y, m)):
    modelc=clone(MLP)
    M[i] = modelc.fit(Xt, yt)

# Step 2. Per observation, action (i.e. relabel)
for i in range(len(x)): 
    if p:
        Pj = [m.predict_proba(x[[i]]) for m in M]
    else:
        Pj = [(1, 0) if m.predict(x[[i]]) == 0 else (0, 1) for m in M]   
    P = np.mean(Pj, 0)
    j = np.argmin(np.matmul(P,c))
    y[i] = j

In [0]:
y=pd.DataFrame({"label":y})
y['label'].unique()

In [0]:
MLP.fit(X_train,y.values.ravel())

In [0]:
predictions_on_train = MLP.predict(X_train) 
predictions_on_test = MLP.predict(X_test) 

In [0]:
print(confusion_matrix(y_test,predictions_on_test))  
print(classification_report(y_test,predictions_on_test)) 

In [0]:
fig,ax = plt.subplots(1,3)
fig.set_size_inches(15,5)
plot_cm(ax[0],  y_train, predictions_on_train, [0,1], 'Confusion matrix (TRAIN)')
plot_cm(ax[1],  y_test, predictions_on_test,   [0,1], 'Confusion matrix (TEST)')
plot_auc(ax[2], y_train, predictions_on_train, y_test, predictions_on_test)
    

KeyboardInterrupt: ignored