In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

## Robustness

In [107]:
def Robustness(x_test, model):
    
    epsilon_up_list = []
    epsilon_down_list = []
    temp_up = 0
    temp_down = 0

    data_mean = round(x_test.mean(),2)/100
    y_predict = model.predict(x_test).reshape(1,-1)[0]
    y_predict = [int(round(x)) for x in y_predict]
    
    for j in range(1,101):
        x_epsilon = x_test.copy()
        epsilon_up = x_epsilon + (j * data_mean)
        epsilon_down = x_epsilon - (j * data_mean)
        y_up = model.predict(epsilon_up).reshape(1,-1)[0]
        y_up = [int(round(x)) for x in y_up]
        y_down = model.predict(epsilon_down).reshape(1,-1)[0]
        y_down = [int(round(x)) for x in y_down]
        df_up = pd.DataFrame(data = {'y_up':y_up , 'y_pred':y_predict})
        df_down = pd.DataFrame(data = {'y_down':y_down , 'y_pred':y_predict})
        diff_up = df_up[df_up.y_up != df_up.y_pred].shape[0]
        diff_down = df_down[df_down.y_down != df_down.y_pred].shape[0]
        epsilon_up_list.append(diff_up - temp_up)
        epsilon_down_list.append(diff_down - temp_down)
        temp_up = diff_up
        temp_down = diff_down
        
    epsilon_up_list = list(map(lambda x: max(x,0),epsilon_up_list))
    epsilon_down_list = list(map(lambda x: max(x,0),epsilon_down_list))
    epsilon_all = list(map(max, epsilon_up_list, epsilon_down_list))
    print('Robustness =', round((x_test.shape[0] - sum(epsilon_all))/(x_test.shape[0]),2),'%')

## Edge cases

In [120]:
def Edge_case_analysis(x_test, model):

        #  epsilon for edge case
        eps = 0.5
        edge_df = pd.DataFrame()
        try:
            num_feats = x_test.columns
        except:
            num_feats = pd.DataFrame(x_test).columns
            
        for c in range(len(num_feats)):     
            pred_label = model.predict(x_test).tolist()
            # value + epsilon
            x_up = x_test.copy()
            x_up[num_feats[c]] += x_up[num_feats[c]]*eps
            pred_up = model.predict(x_up).tolist()

            # value - epsilon
            x_dwn = x_test.copy()
            x_dwn[num_feats[c]] -= x_dwn[num_feats[c]]*eps
            pred_dwn = model.predict(x_dwn).tolist()

            df_ = pd.DataFrame({ 'pred_label' : pred_label, 'pred_label_up' : pred_up, 'pred_label_down' : pred_dwn })
            df_['features'] = num_feats[c]
            edge_df = pd.concat([edge_df, df_], axis = 0)
        up = edge_df[edge_df['pred_label'] != edge_df['pred_label_up']].shape[0]
        down = edge_df[edge_df['pred_label'] != edge_df['pred_label_down']].shape[0]
        max_edge = max(up, down)
        print(f'edge_case = {round(1 - ((max_edge) / (x_test.shape[0] * x_test.shape[1])),5) } %')

## XGB Model

In [109]:
df = pd.read_csv('input_data.csv', index_col=[0])

In [110]:
y = df['target'] 
X = df.drop(['target'], axis=1)
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=17)

In [111]:
import xgboost as xgb
xgb_model = xgb.XGBClassifier().fit(x_train, y_train)
print('score = ',round(xgb_model.score(x_test, y_test),3),'%')
Robustness(x_test, xgb_model)
Edge_case_analysis(x_test, xgb_model)

score =  0.999 %
Robustness = 0.74 %
edge_case = 0.99985 %


## NN Model

In [112]:
from keras.models import Sequential
from keras.layers import Dense
from keras import optimizers

hidden_units=100
learning_rate=0.01
hidden_layer_act='tanh'
output_layer_act='sigmoid'
no_epochs=100

NN_model = Sequential()
NN_model.add(Dense(hidden_units, input_dim= X.shape[1], activation=hidden_layer_act))
NN_model.add(Dense(hidden_units, activation=hidden_layer_act))
NN_model.add(Dense(1, activation = output_layer_act))


sgd=optimizers.SGD(learning_rate=learning_rate)
NN_model.compile(loss='binary_crossentropy',optimizer=sgd, metrics=['acc'])
NN_model.fit(x_train, y_train, epochs=no_epochs, batch_size=len(X),  verbose=2)

Epoch 1/100
1/1 - 14s - loss: 0.7505 - acc: 0.4019 - 14s/epoch - 14s/step
Epoch 2/100
1/1 - 0s - loss: 0.4504 - acc: 0.9663 - 16ms/epoch - 16ms/step
Epoch 3/100
1/1 - 0s - loss: 0.3852 - acc: 0.9656 - 17ms/epoch - 17ms/step
Epoch 4/100
1/1 - 0s - loss: 0.3418 - acc: 0.9656 - 17ms/epoch - 17ms/step
Epoch 5/100
1/1 - 0s - loss: 0.3095 - acc: 0.9650 - 20ms/epoch - 20ms/step
Epoch 6/100
1/1 - 0s - loss: 0.2837 - acc: 0.9647 - 24ms/epoch - 24ms/step
Epoch 7/100
1/1 - 0s - loss: 0.2622 - acc: 0.9641 - 20ms/epoch - 20ms/step
Epoch 8/100
1/1 - 0s - loss: 0.2457 - acc: 0.9644 - 18ms/epoch - 18ms/step
Epoch 9/100
1/1 - 0s - loss: 0.2335 - acc: 0.9647 - 24ms/epoch - 24ms/step
Epoch 10/100
1/1 - 0s - loss: 0.2235 - acc: 0.9647 - 23ms/epoch - 23ms/step
Epoch 11/100
1/1 - 0s - loss: 0.2147 - acc: 0.9647 - 26ms/epoch - 26ms/step
Epoch 12/100
1/1 - 0s - loss: 0.2068 - acc: 0.9647 - 16ms/epoch - 16ms/step
Epoch 13/100
1/1 - 0s - loss: 0.1996 - acc: 0.9647 - 18ms/epoch - 18ms/step
Epoch 14/100
1/1 - 0s 

<keras.callbacks.History at 0x1bc5a848d60>

In [113]:
Robustness(x_test, NN_model)



Robustness = 0.66 %


In [114]:
Edge_case_analysis(x_test, NN_model)



edge_case = 0.87237 %


## Decision Tree

In [115]:
from sklearn.tree import DecisionTreeClassifier
dt = DecisionTreeClassifier()
dt.fit(x_train, y_train)
print('score = ',round(dt.score(x_test, y_test),3),'%')
Robustness(x_test, dt)
Edge_case_analysis(x_test, dt)

score =  0.999 %
Robustness = 0.17 %
edge_case = 0.99976 %


# Breast Cancer Dataset

In [116]:
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
X, y = load_breast_cancer(return_X_y =True, as_frame = True)
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=16)
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

## XGB Model

In [121]:
xgb_model = xgb.XGBClassifier().fit(x_train, y_train)
print('score = ',round(xgb_model.score(x_test, y_test),3),'%')
Robustness(x_test, xgb_model)
Edge_case_analysis(x_test, xgb_model)

score =  0.963 %
Robustness = 0.96 %
edge_case = 0.99965 %


## NN Model

In [122]:
hidden_units=100
learning_rate=0.01
hidden_layer_act='tanh'
output_layer_act='sigmoid'
no_epochs=100

NN_model_BCancer = Sequential()
NN_model_BCancer.add(Dense(hidden_units, input_dim= X.shape[1], activation=hidden_layer_act))
NN_model_BCancer.add(Dense(hidden_units, activation=hidden_layer_act))
NN_model_BCancer.add(Dense(1, activation = output_layer_act))


sgd=optimizers.SGD(learning_rate=learning_rate)
NN_model_BCancer.compile(loss='binary_crossentropy',optimizer=sgd, metrics=['acc'])
NN_model_BCancer.fit(x_train, y_train, epochs=no_epochs, batch_size=len(X),  verbose=2)

Epoch 1/100
1/1 - 1s - loss: 0.7447 - acc: 0.3806 - 514ms/epoch - 514ms/step
Epoch 2/100
1/1 - 0s - loss: 0.6935 - acc: 0.5381 - 11ms/epoch - 11ms/step
Epoch 3/100
1/1 - 0s - loss: 0.6486 - acc: 0.6483 - 12ms/epoch - 12ms/step
Epoch 4/100
1/1 - 0s - loss: 0.6091 - acc: 0.7454 - 13ms/epoch - 13ms/step
Epoch 5/100
1/1 - 0s - loss: 0.5744 - acc: 0.8005 - 16ms/epoch - 16ms/step
Epoch 6/100
1/1 - 0s - loss: 0.5436 - acc: 0.8346 - 15ms/epoch - 15ms/step
Epoch 7/100
1/1 - 0s - loss: 0.5164 - acc: 0.8504 - 19ms/epoch - 19ms/step
Epoch 8/100
1/1 - 0s - loss: 0.4921 - acc: 0.8583 - 13ms/epoch - 13ms/step
Epoch 9/100
1/1 - 0s - loss: 0.4704 - acc: 0.8688 - 13ms/epoch - 13ms/step
Epoch 10/100
1/1 - 0s - loss: 0.4509 - acc: 0.8845 - 14ms/epoch - 14ms/step
Epoch 11/100
1/1 - 0s - loss: 0.4333 - acc: 0.8819 - 17ms/epoch - 17ms/step
Epoch 12/100
1/1 - 0s - loss: 0.4174 - acc: 0.8898 - 15ms/epoch - 15ms/step
Epoch 13/100
1/1 - 0s - loss: 0.4029 - acc: 0.8950 - 12ms/epoch - 12ms/step
Epoch 14/100
1/1 - 

<keras.callbacks.History at 0x1bca3d67ac0>

In [123]:
Robustness(x_test, NN_model_BCancer)



Robustness = 0.96 %


In [124]:
Edge_case_analysis(x_test, NN_model_BCancer)

edge_case = 0.99468 %


## Decision Tree

In [125]:
dt.fit(x_train, y_train)
print('score = ',round(dt.score(x_test, y_test),3),'%')
Robustness(x_test, dt)
Edge_case_analysis(x_test, dt)

score =  0.931 %
Robustness = 0.94 %
edge_case = 0.99929 %
