## Classification using ANN

In [None]:
import numpy as np
import tensorflow as tf
import math
import logging
logging.basicConfig(level = logging.DEBUG)
import matplotlib.pyplot as plt
from scipy.stats import zscore
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

###### ANN Model Creation Function

In [None]:
def create_ann_model(n_in, n_out, n_h1 = 10, n_h2 = 10):
    
    #Network parameters
    n_hidden1 = n_h1
    n_hidden2 = n_h2
    n_input = n_in
    n_output = n_out
    
    #DEFINING WEIGHTS AND BIASES
    #Biases first hidden layer
    b1 = tf.Variable(tf.random_normal([n_hidden1]))
    #Biases second hidden layer
    b2 = tf.Variable(tf.random_normal([n_hidden2]))
    #Biases output layer
    b3 = tf.Variable(tf.random_normal([n_output]))
    #Weights connecting input layer with first hidden layer
    w1 = tf.Variable(tf.random_normal([n_input, n_hidden1]))
    #Weights connecting first hidden layer with second hidden layer
    w2 = tf.Variable(tf.random_normal([n_hidden1, n_hidden2]))
    #Weights connecting second hidden layer with output layer
    w3 = tf.Variable(tf.random_normal([n_hidden2, n_output]))
    
    def multilayer_perceptron(input_d):
        #Task of neurons of first hidden layer
        layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(input_d, w1), b1))
        #Task of neurons of second hidden layer
        layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, w2), b2))
        #Task of neurons of output layer
        out_layer = tf.add(tf.matmul(layer_2, w3),b3)

        return out_layer
    
    neural_network = multilayer_perceptron(X)
    
    return neural_network

###### ANN Classifier Training Function

In [None]:
def train_test_ann_classifier(neural_network, optimizer, train_x, train_y, test_x, test_y_label, n_epoch = 1000, batch_size = 1000):
    
    #Learning parmeters
    number_epoch = n_epoch
    
    #Initializing the variables
    init = tf.global_variables_initializer()
    
    with tf.Session() as sess:
        sess.run(init)
        total_batch = int(len(train_y) / batch_size)
        
        #Training epoch
        for epoch in range(number_epoch):

            for i in range(total_batch):
                
                batch_x = train_x[i * batch_size:min(i * batch_size + batch_size, len(train_x)), :]
                batch_y = train_y[i * batch_size:min(i * batch_size + batch_size, len(train_y)), :]
                sess.run(optimizer, feed_dict={X: batch_x, Y:batch_y})
            
            #Display the epoch
            #if epoch % 100 == 0:
            #    print("Epoch:", '%d' % (epoch))
                  
        pred = (neural_network)
        
        #Class estimation using argmax
        estimated_class = tf.argmax(pred, 1).eval({X: test_x})
        correct_prediction = np.equal(estimated_class, test_y_label)
        result = np.column_stack((test_y_label, estimated_class, correct_prediction))
        
        return result

###### Peformance Evaluation

In [None]:
def result_evaluation(result):
    
    #select data with predicted label '1'
    tt_idx = np.where(result[:, 1] == 1);
    #select data with predicted label '0'
    tf_idx = np.where(result[:, 1] == 0);

    #merge data with predicted label '1'
    testing_true = result[tt_idx]
    #merge data with predicted label '1'
    testing_false = result[tf_idx]

    #calculate number of true positive result
    t_positive = sum(testing_true[:,2] == 1)
    #calculate number of false positive result
    f_positive = sum(testing_true[:,2] == 0)

    
    #calculate number of true negative result
    t_negative = sum(testing_false[:,2] == 1)
    #calculate number of false negative result
    f_negative = sum(testing_false[:,2] == 0)

    #calcaulte precision value of the result
    precision = t_positive / (t_positive + f_positive)
    #calcaulte recall value of the result
    recall = t_positive / (t_positive + f_negative)
    #calcaulte f-measure value of the result
    fmeasure = 2 * ((precision * recall) / (precision + recall))
    #calcaulte accuracy of the result
    accuracy = (t_positive + t_negative) / result.shape[0]

    return [precision, recall, fmeasure, accuracy]

###### Data Pre-processing

In [None]:
def one_hot_encoding(data):
    
    #binary encode
    onehot_encoder = OneHotEncoder(sparse=False)
    feature = data.reshape(len(data), 1)   
    return onehot_encoder.fit_transform(feature)

def data_cleaning(data, column, invalid_value):
    
    #search data with invalid feature value
    idx = np.isin(data[:, column], invalid_value)
    invalid_idx = np.nonzero(idx)
    #remove data with invalid feature value
    return np.delete(data, invalid_idx, 0)

#load dataset
data=np.genfromtxt('datasets/default of credit card clients.csv', delimiter=',')

#remove first row of dataset (feature name)
data = np.delete(data, 0, 0)
#remove first column of dataset (data index)
data = np.delete(data, 0, 1)

data = data_cleaning(data, 2, [0,5,6])
data = data_cleaning(data, 3, 0)

#normalise continous data using z-score function
normalised_continous_data = zscore(data[:, [0,4]+list(range(11,23))])

#perform one hot encoding on categorical features
gender_one_hot = one_hot_encoding(data[:,1])
education_one_hot = one_hot_encoding(data[:,2])
marital_one_hot = one_hot_encoding(data[:,3])
label_one_hot = one_hot_encoding(data[:,23])

#merge dataset after preprocessing
classification_data = np.column_stack((normalised_continous_data, gender_one_hot,education_one_hot,marital_one_hot, data[:,5:11], label_one_hot))

#sort dataset according to label
sorted_classification_data = classification_data[np.argsort(classification_data[:, 30])]

#calculate dataset with label '0'
numof0 = sum(sorted_classification_data[:, 29] == 1);
#calculate dataset with label '1'
numof1 = sorted_classification_data.shape[0]-numof0

#suffle dataset with label '0'
np.random.shuffle(sorted_classification_data[0:numof1-1,:])
#suffle dataset with label '1'
np.random.shuffle(sorted_classification_data[numof0:-1,:])

#balancing dataset with label '0'
dataset_label_0 = sorted_classification_data[0: numof1-1, :];
#balancing dataset with label '1'
dataset_label_1 = sorted_classification_data[sorted_classification_data.shape[0]-numof1:-1, :];

###### Optimizer Creation Function

In [None]:
def create_optimizer(neural_network, learning_c = 0.2):
    
    #Learning parmeters
    learning_constant = learning_c
    
    #Define loss and optimizer
    loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=neural_network,labels=Y))
    optimizer = tf.train.GradientDescentOptimizer(learning_constant).minimize(loss_op)
     
    return optimizer

###### Functions used for Hyperparameter tuning

In [None]:
# generate initial offspring for 1st generation
def generate_initial_offspring(decimal_flag, lower_limit, upper_limit):
    
    #check if generated offspring need to be in floats
    if decimal_flag:
        offspring = np.round(np.random.uniform(lower_limit, upper_limit, 10), decimals = 3)
    else:
        X = np.round(np.random.uniform(lower_limit, upper_limit, 10))
        offspring = [int(item) for item in X]
        
    return offspring

# generate offspring from previous generation
def generate_offspring_np(best_parent, decimal_flag, lower_limit,upper_limit):
    
    #calculate range of the parent's generation
    std_deviation = (upper_limit - lower_limit) * 0.1
    
    #check if generated offspring need to be in floats
    if decimal_flag:
        X = np.round(np.random.normal(best_parent, std_deviation, size=(1, 10)),decimals = 3)
        offspring = [best_parent if (x < lower_limit or x > upper_limit) else x for x in X[0]]
    else:
        X = np.round(np.random.normal(best_parent, std_deviation, size=(1, 10)))
        X = [int(item) for item in X[0]]
        offspring = [best_parent if (x < lower_limit or x > upper_limit) else x for x in X]
    
    return offspring

###### Hyperparameter Tuning on the loss function of the model

In [None]:
#stack balance dataset with abel '0' and '1'
balanced_classification_data = np.vstack((dataset_label_0, dataset_label_1))
#shuffle the balanced dataset
np.random.shuffle(balanced_classification_data)

#split shuffled dataset for tuning and test set
test_dataset_size = math.floor(balanced_classification_data.shape[0] / 10)
tuning_dataset_size = math.floor((balanced_classification_data.shape[0] - test_dataset_size) / 2)

#spliting feature and label for test set 
test_x = balanced_classification_data[0 : test_dataset_size, :29]
test_y = balanced_classification_data[0 : test_dataset_size, 29:]
test_y_label = test_y[:, 1]

#spliting feature and label for first tuning set
tuning1_x = balanced_classification_data[test_dataset_size : test_dataset_size + tuning_dataset_size, :29]
tuning1_y = balanced_classification_data[test_dataset_size : test_dataset_size + tuning_dataset_size, 29:]
tuning1_y_label = tuning1_y[:, 1]

#spliting feature and label for second tuning set
tuning2_x = balanced_classification_data[test_dataset_size + tuning_dataset_size : -1, :29]
tuning2_y = balanced_classification_data[test_dataset_size + tuning_dataset_size : -1, 29:]
tuning2_y_label = tuning2_y[:, 1]

#stack tuning dataset
train_x = np.vstack((tuning1_x, tuning2_x))
train_y = np.vstack((tuning1_y, tuning2_y))

#Network parameters
n_input = 29
n_output = 2
n_hidden1 = 10
n_hidden2 = 10

#Learning parmeters
learning_constant = 0.2
number_epoch = 1000
batch_size = 1000

#Defining the input and the output
X = tf.compat.v1.placeholder("float", [None, n_input])
Y = tf.compat.v1.placeholder("float", [None, n_output])

neural_network = create_ann_model(n_input, n_output, n_hidden1, n_hidden2)

#set the loss function to be mean absolute error
loss_op = tf.keras.losses.MeanAbsoluteError()(neural_network, Y)
optimizer = tf.train.GradientDescentOptimizer(learning_constant).minimize(loss_op)
result = train_test_ann_classifier(neural_network, optimizer, train_x, train_y, test_x, test_y_label, number_epoch)
[precision, recall, fmeasure, accuracy] = result_evaluation(result)
print("Mean Absolute Error: ", fmeasure)

neural_network = create_ann_model(n_input, n_output, n_hidden1, n_hidden2)

#set the loss function to be mean absolute percentage error
loss_op = tf.keras.losses.MeanAbsolutePercentageError()(neural_network, Y)
optimizer = tf.train.GradientDescentOptimizer(learning_constant).minimize(loss_op)
result = train_test_ann_classifier(neural_network, optimizer, train_x, train_y, test_x, test_y_label, number_epoch)
[precision, recall, fmeasure, accuracy] = result_evaluation(result)

#display the performance of the model
print("Mean Absolute Percentage Error: ", fmeasure)

neural_network = create_ann_model(n_input, n_output, n_hidden1, n_hidden2)

#set the loss function to be mean squared logarithmic error
loss_op = tf.keras.losses.MeanSquaredLogarithmicError()(neural_network, Y)
optimizer = tf.train.GradientDescentOptimizer(learning_constant).minimize(loss_op)
result = train_test_ann_classifier(neural_network, optimizer, train_x, train_y, test_x, test_y_label, number_epoch)
[precision, recall, fmeasure, accuracy] = result_evaluation(result)

#display the performance of the model
print("Mean Squared Logarithmic Error: ", fmeasure)

neural_network = create_ann_model(n_input, n_output, n_hidden1, n_hidden2)

#set the loss function to be softmax cross entropy error
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=neural_network,labels=Y))
optimizer = tf.train.GradientDescentOptimizer(learning_constant).minimize(loss_op)
result = train_test_ann_classifier(neural_network, optimizer, train_x, train_y, test_x, test_y_label, number_epoch)
[precision, recall, fmeasure, accuracy] = result_evaluation(result)

#display the performance of the model
print("Softmax Cross Entropy: ", fmeasure)

neural_network = create_ann_model(n_input, n_output, n_hidden1, n_hidden2)

#set the loss function to be binary cross entropy error
loss_op = tf.keras.losses.BinaryCrossentropy()(neural_network, Y)
optimizer = tf.train.GradientDescentOptimizer(learning_constant).minimize(loss_op)
result = train_test_ann_classifier(neural_network, optimizer, train_x, train_y, test_x, test_y_label, number_epoch)
[precision, recall, fmeasure, accuracy] = result_evaluation(result)

#display the performance of the model
print("Binary Cross Entropy: ", fmeasure)

###### Hyperparameter Tuning on the optimizer function of the model

In [None]:
neural_network = create_ann_model(n_input, n_output, n_hidden1, n_hidden2)
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=neural_network,labels=Y))
optimizer = tf.train.GradientDescentOptimizer(learning_constant).minimize(loss_op)
result = train_test_ann_classifier(neural_network, optimizer, train_x, train_y, test_x, test_y_label, number_epoch)
[precision, recall, fmeasure, accuracy] = result_evaluation(result)
print("Gradient Descent Optimizer: ", fmeasure)

neural_network = create_ann_model(n_input, n_output, n_hidden1, n_hidden2)
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=neural_network,labels=Y))
optimizer = tf.compat.v1.train.AdamOptimizer(learning_constant).minimize(loss_op)
result = train_test_ann_classifier(neural_network, optimizer, train_x, train_y, test_x, test_y_label, number_epoch)
[precision, recall, fmeasure, accuracy] = result_evaluation(result)
print("ADAM Optimizer: ", fmeasure)

###### Hyperparameter Tuning on network topology of the model

In [None]:
#Network parameters
n_input = 29
n_output = 2

#Learning parmeters
learning_constant = 0.05
number_epoch = 1000
batch_size = 50

#Defining the input and the output
X = tf.compat.v1.placeholder("float", [None, n_input])
Y = tf.compat.v1.placeholder("float", [None, n_output])

#define the output variable of tuning (x1: number of node in first hidden layer; x2: number of node in second hidden layer;z1: f-measure of the result)
x1 = np.empty((0, 1))
y1 = np.empty((0, 1))
z1 = np.empty((0, 1))

best_n_hidden = [0, 0, 0]

for i in range(0, 10):
    
    print("generation: ", i + 1)                                                              
    print("Best so far:", best_n_hidden)
    
    # check the number of generation and generate offspring
    if i == 0:
        n_hidden_offspring = np.vstack((generate_initial_offspring(False, n_output, n_input), generate_initial_offspring(False, n_output, n_input))).T
    else:
        n_hidden_offspring = np.vstack((generate_offspring_np(best_n_hidden[0], False, n_output, n_input), generate_offspring_np(best_n_hidden[1], False, n_output, n_input))).T
    
    # evaluate the performance of the model based on the value of offspring
    for n_hidden in n_hidden_offspring:
        
        print("n_hidden1: ", n_hidden[0], "n_hidden2:", n_hidden[1])
        
        neural_network = create_ann_model(n_input, n_output, n_hidden[0], n_hidden[1])
        optimizer = create_optimizer(neural_network , learning_constant)
        result1 = train_test_ann_classifier(neural_network, optimizer, tuning1_x, tuning1_y, tuning2_x, tuning2_y_label, number_epoch, batch_size)
        
        #interchange training set with validation set
        neural_network = create_ann_model(n_input, n_output, n_hidden[0], n_hidden[1])
        optimizer = create_optimizer(neural_network , learning_constant)
        result2 = train_test_ann_classifier(neural_network, optimizer, tuning2_x, tuning2_y, tuning1_x, tuning1_y_label, number_epoch, batch_size)
        
        result = np.vstack((result1, result2))
       
        [precision, recall, fmeasure, accuracy] = result_evaluation(result)
        
        x1 = np.vstack((x1, n_hidden[0]))
        y1 = np.vstack((y1, n_hidden[1]))
        z1 = np.vstack((z1, fmeasure))
        
        #replace the best parents with the offpsring with highest f-measure
        if fmeasure >= best_n_hidden[2]:
            best_n_hidden = [n_hidden[0], n_hidden[1], fmeasure]

n_hidden1 = best_n_hidden[0]
n_hidden2 = best_n_hidden[1]

###### scatter plot on the hyperparameter on the network topology

In [None]:
logging.getLogger('matplotlib.font_manager').disabled = True
fig = plt.figure()
ax = fig.add_subplot(projection='3d')
ax.scatter(x1, y1, z1)
ax.view_init(10, 10)
plt.show()

print(best_n_hidden)

###### hyperparameter tuning on the learning rate, batch size and epoch size

In [None]:
x2 = np.empty((0, 1))
y2 = np.empty((0, 1))
anotations = np.empty((0, 3))

#[learning_constant, number_epoch, fmeasure]
best_learning_params = [0, 0, 0, 0]

for i in range(0, 10):
    
    print("generation: ", i + 1)                                                              
    print("Best so far:", best_learning_params)
    
    # check the iteration of generation and generate offspring
    if i == 0:        
        learning_rate_offspring = generate_initial_offspring(True, 0, 1)
        number_epoch_offspring = generate_initial_offspring(False, 1, 5000)
        batch_size_offspring = generate_initial_offspring(False, 1, 5000)
        
    else:
        learning_rate_offspring = generate_offspring_np(best_learning_params[0], True, 0, 1)
        number_epoch_offspring = generate_offspring_np(best_learning_params[1], False, 1, 5000)
        batch_size_offspring = generate_offspring_np(best_learning_params[2], False, 1, 5000)
        
    learning_params_offspring = np.vstack((learning_rate_offspring, number_epoch_offspring, batch_size_offspring)).T

    # evaluate the performance of the model based on the value of offspring    
    for learning_params in learning_params_offspring:
        
        print("Learning Constant: ", learning_params[0], "Number Epoch:", learning_params[1], "Batch Size", learning_params[2])
        
        neural_network = create_ann_model(n_input, n_output, n_hidden1, n_hidden2)
        optimizer = create_optimizer(neural_network , learning_params[0])
        result1 = train_test_ann_classifier(neural_network, optimizer, tuning1_x, tuning1_y, tuning2_x, tuning2_y_label, int(learning_params[1]), int(learning_params[2]))
        
        #interchange training set with validation set
        neural_network = create_ann_model(n_input, n_output, n_hidden1, n_hidden2)
        optimizer = create_optimizer(neural_network , learning_params[0])
        result2 = train_test_ann_classifier(neural_network, optimizer, tuning2_x, tuning2_y, tuning1_x, tuning1_y_label, int(learning_params[1]), int(learning_params[2]))
        
        result = np.vstack((result1, result2))
       
        [precision, recall, fmeasure, accuracy] = result_evaluation(result)
        
        x2 = np.vstack((x2, i + 1))
        y2 = np.vstack((y2, fmeasure))
        anotations = np.vstack((anotations, learning_params))
        
        #replace the best parents with the offpsring with highest f-measure
        if fmeasure >= best_learning_params[3]:
            best_learning_params = [learning_params[0], learning_params[1], learning_params[2], fmeasure]

learning_constant = best_learning_params[0]
number_epoch = int(best_learning_params[1])
batch_size = int(best_learning_params[2])

###### scatter plot on the hyperparameter on the learning rate, batch size and epoch size

In [None]:
plt.scatter(x2, y2)
print(best_learning_params)

###### evaluate the performance of the tuned model using test set

In [None]:
neural_network = create_ann_model(n_input, n_output, n_hidden1, n_hidden2)
optimizer = create_optimizer(neural_network , learning_constant)
result = train_test_ann_classifier(neural_network, optimizer, train_x, train_y, test_x, test_y_label, number_epoch)

[precision, recall, fmeasure, accuracy] = result_evaluation(result)

print([precision, recall, fmeasure, accuracy])

###### 10-fold cross validation using the fine tuned model

In [None]:
#define the size of dataset in each partition
partition_size = math.floor(dataset_label_0.shape[0] / 10)

#define output result
result = np.empty((0, 3), int)
    
#Network parameters
#n_hidden1 = 20
#n_hidden2 = 15
n_input = 29
n_output = 2
#Learning parmeters
#learning_constant = 0.01
#number_epoch = 1000
#batch_size = 300

#Defining the input and the output
X = tf.placeholder("float", [None, n_input])
Y = tf.placeholder("float", [None, n_output])

for f in range(10):
    
    #maintain the blance of dataset in each partision
    test_0_x = dataset_label_0[f * partition_size : (f + 1) * partition_size, 0:29]
    test_0_y = dataset_label_0[f * partition_size : (f + 1) * partition_size,29:31]
    
    test_1_x = dataset_label_1[f * partition_size : (f + 1) * partition_size, 0:29]
    test_1_y = dataset_label_1[f * partition_size : (f + 1) * partition_size,29:31]

    train_0_set = np.delete(dataset_label_0, np.arange(f * partition_size, (f + 1) * partition_size), 0)
    train_1_set = np.delete(dataset_label_1, np.arange(f * partition_size, (f + 1) * partition_size), 0)
    
    concat_train = np.vstack((train_0_set,train_1_set))
    np.random.shuffle(concat_train)

    train_x = concat_train[:,:-2]
    train_y = concat_train[:,-2:]

    test_x = np.vstack((test_0_x, test_1_x))
    test_y = np.vstack((test_0_y, test_1_y))
    test_y_label = test_y[:, 1]
    
    #define the ANN model
    neural_network = create_ann_model(n_input, n_output, n_hidden1, n_hidden2)
    optimizer = create_optimizer(neural_network , learning_constant)
    
    #train the model
    fold_result = train_test_ann_classifier(neural_network, optimizer, train_x, train_y, test_x, test_y_label, number_epoch)
    
    #evaluate the result on the fold
    fold_evaluation_result = result_evaluation(fold_result)
    
    #print the evaluation result in each fold
    print(fold_evaluation_result)
    
    #concatenate the result
    result = np.vstack((result, fold_result))

#evaluate the average performance of the model in all fold
[precision, recall, fmeasure, accuracy] = result_evaluation(result)

#display the average performace of the model in all fold
print([precision, recall, fmeasure, accuracy])