In [1]:
#importing the required modules

import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from __future__ import print_function
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.metrics import average_precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from keras.models import Sequential
from keras.layers import Dense, Dropout
import keras.backend as K
from itertools import product

In [2]:
# Extracting the data from the csv file and removing the outliers and then splitting into train and test sets

Z1 = pd.read_csv('clr-data.csv')
Z1 = Z1[Z1['FNC']<1e6]
Z = Z1[Z1['FNC']>0]
X = Z.drop(['Status'],axis=1)
Y = Z['Status']
X_temp_train, X_temp_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33, random_state=1)
FNC_train = X_temp_train['FNC']
FNC_test = X_temp_test['FNC']
X_train = X_temp_train.drop(['FNC'],axis=1)
X_test = X_temp_test.drop(['FNC'],axis=1)

In [3]:
# setting the costs

cost_TN = 0
cost_TP = 150
cost_FP = 150

In [4]:
#function for standardizing the data using mean and standard deviation

def standardize(train, test):
    mean = np.mean(train, axis=0)
    std = np.std(train, axis=0)+0.000001
    X_train = (train - mean) / std
    X_test = (test - mean) /std
    return X_train, X_test

In [5]:
X_train, X_test = standardize(X_train,X_test)

In [6]:
# passing the false negative cost for each row will require merging the cost into the output 

def create_y_input(y_train, c_FN):
    y_str = pd.Series(y_train).reset_index(drop=True).apply(lambda x: str(int(x)))
    c_FN_str = pd.Series(c_FN).reset_index(drop=True).apply(lambda x: '0'*(6-len(str(int(x)))) + str(int(x)))
    return y_str + '.' + c_FN_str

In [7]:
# creating the customised loss function

def custom_loss():
    def loss_function(y_input, y_pred):
        y_true = K.round(y_input)
        c_FN = (y_input - y_true) * 1e6
        cost = (y_true * (1-y_pred) * c_FN) + (y_true * (y_pred) * cost_TP) + ((1 - y_true) * ( y_pred) * cost_FP) +  ( (1 - y_true) * (1-y_pred) * cost_TN)
        return K.mean(cost, axis=-1)
    return loss_function

In [8]:
#Creating the deep neural network model

def create_model(indput_dim, dropout=0.2):
    model = Sequential([
    Dense(units=40, input_dim=indput_dim, activation='relu'),
    Dropout(dropout),
    Dense(units=20, activation='relu'),
    Dropout(dropout),
    Dense(10, activation='relu'),
    Dense(1, activation='sigmoid')])
    return model

In [9]:
#Creating and compiling the model 

model = create_model(11,0.2)
model.compile(optimizer='adam', loss=custom_loss())

In [10]:
y_train = create_y_input(Y_train,FNC_train)
y_train = np.float32(y_train)
model.fit(X_train, y_train, batch_size=128, epochs=2, verbose=1)

Epoch 1/2
Epoch 2/2


<tensorflow.python.keras.callbacks.History at 0x7feea4341af0>

In [11]:
#Obtaining the result
y_pred = model.predict(X_test)

In [12]:
#Analysis of the result
print("accuracy =",accuracy_score(Y_test,y_pred.round()))
print("precision =",average_precision_score(Y_test, y_pred.round()))
print("recall =",recall_score(Y_test, y_pred.round()))
print("f1_score =",f1_score(Y_test, y_pred.round()))

accuracy = 0.7869160263853546
precision = 0.34491766200299895
recall = 0.277323958225623
f1_score = 0.38344413467724636


In [13]:
#computing the savings cost
cost_max=0
costs=0
for i in range(len(Y_test)):
    if(Y_test.iloc[i]==1):
        cost_max = cost_max + FNC_test.iloc[i]
for i in range(len(Y_test)):
    if(Y_test.iloc[i]==1):
        if(y_pred[i].round()==1):
            costs=costs+cost_TP
        else:
            costs=costs+FNC_test.iloc[i]
    else:
        if(y_pred[i].round()==1):
            costs=costs+cost_FP
        else:
            costs=costs+cost_TN
            
print("cost savings =",1-(costs/cost_max))

cost savings = 0.13994579685939013
