# Densely Connected Neural Network


In [98]:
import numpy as np
from numpy import genfromtxt # generate an array fron a text file

import matplotlib.pyplot as plt
%matplotlib inline

# it will split the features and labels into a train set and a test set
# This (train_...) also does randomized shuffling, so we don't have to worry about the labels being sorted by accident.
# This will automatically shuffle them for us.
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import confusion_matrix, classification_report

from keras.models import Sequential
from keras.layers import Dense

# we are passing the delimiter parameter to specify that the features are separated by a comma
# data = genfromtxt('./data/labels_features.csv', delimiter=",")#, names=True, dtype=None)
data = genfromtxt('./data/labels_features.csv', delimiter=",", names=True)#, dtype=None)
NAMES = data.dtype.names
data = genfromtxt('./data/labels_features.csv', delimiter=",")
data = data[1:, :]

In [99]:
# we could make it more universal I guess, but here it does not matter
# label_index = len(data[1, :]) - 1
# labels = data[:, label_index]
# features_no = len(data[1, :]) - 1
# features = data[:, 0:features_no]

IDS = data[:, 0] # data[:, 0]
LABELS = data[:, 1] # only class telling real / fake
FEATURES = data[:, 2:] # only features, no class
TARGET_NAMES = ['Metastasis, class 0', 'No metastasis, class 1']

In [100]:
test_size = 0.33

dim = len(FEATURES[0])
num_labels = len(np.unique(LABELS))

# passing the X features; y labels; test size of 33%; random_state => seed to have the same shuffle every time
# why 42? => https://news.mit.edu/2019/answer-life-universe-and-everything-sum-three-cubes-mathematics-0910
x_train, x_test, y_train, y_test = train_test_split(FEATURES, LABELS, test_size=test_size, random_state=42)

In [101]:
# force all the feature data to fall within a certain range
# this can actually help the neural network perform better
scaler_object = MinMaxScaler()

# fit the scaler object to our training data
# fit() finds the min and max value and then transform() is transforming the given array based on the MinMax we just calculated durring the fit
scaler_object.fit(X_train)
# we only fit to X_train and not X_test BECAUSE we do not want the scaler_object to peek at any test data - it would be cheating. If we would do that it is called data leakage and is essentially cheating. So we fit to the train data but transform both
scaled_X_train = scaler_object.transform(X_train)
scaled_X_test = scaler_object.transform(X_test)

In [102]:
# creates the model
model = Sequential()

# adding the layers
# add the dense layer, expecting 4 features (we have 4 neurons), input dimention; activation function ReLu
model.add(Dense(dim, input_dim = dim, activation = 'relu'))

# here we can play arround with the neurons; too large / too small => bad results; we can do 1x or 2x input dimensions; we do not specify the input dim as it is not the input layer - it is a hidden layer
model.add(Dense(8, activation= 'relu'))

# 1 because we only have 1 neuron which has 1 output and is outputting the result of either 0 or 1; activation type sigmoid => fit between 0 and 1
model.add(Dense(1, activation= 'sigmoid'))

In [103]:
model.compile(loss= 'binary_crossentropy', optimizer= 'adam', metrics= ['accuracy'])

In [104]:
model.fit(scaled_X_train, y_train, epochs= 400, verbose= 2)

Epoch 1/400
 - 0s - loss: 0.6783 - acc: 0.6667
Epoch 2/400
 - 0s - loss: 0.6513 - acc: 0.7126
Epoch 3/400
 - 0s - loss: 0.6309 - acc: 0.7126
Epoch 4/400
 - 0s - loss: 0.6207 - acc: 0.7126
Epoch 5/400
 - 0s - loss: 0.6108 - acc: 0.7126
Epoch 6/400
 - 0s - loss: 0.6067 - acc: 0.7126
Epoch 7/400
 - 0s - loss: 0.6032 - acc: 0.7126
Epoch 8/400
 - 0s - loss: 0.6010 - acc: 0.7126
Epoch 9/400
 - 0s - loss: 0.5982 - acc: 0.7126
Epoch 10/400
 - 0s - loss: 0.5948 - acc: 0.7126
Epoch 11/400
 - 0s - loss: 0.5908 - acc: 0.7126
Epoch 12/400
 - 0s - loss: 0.5876 - acc: 0.7126
Epoch 13/400
 - 0s - loss: 0.5850 - acc: 0.7126
Epoch 14/400
 - 0s - loss: 0.5825 - acc: 0.7126
Epoch 15/400
 - 0s - loss: 0.5788 - acc: 0.7126
Epoch 16/400
 - 0s - loss: 0.5763 - acc: 0.7126
Epoch 17/400
 - 0s - loss: 0.5725 - acc: 0.7126
Epoch 18/400
 - 0s - loss: 0.5693 - acc: 0.7126
Epoch 19/400
 - 0s - loss: 0.5662 - acc: 0.7126
Epoch 20/400
 - 0s - loss: 0.5625 - acc: 0.7126
Epoch 21/400
 - 0s - loss: 0.5594 - acc: 0.7126
E

<keras.callbacks.History at 0x2870d03d470>

In [105]:
# Spits out probabilities by default.
# model.predict(scaled_X_test)

# model.predict_classes(scaled_X_test)

In [106]:
model.metrics_names

['loss', 'acc']

In [117]:
predictions = model.predict_classes(scaled_X_test)

# we have the answers because we have the y_test vector
conf_mat = confusion_matrix(y_test, predictions)
# [[True Negative, False Negative],
# [False Positive, True Positive]]

confusion = {
    "TruePositive": conf_mat[1][1],
    "TrueNegative": conf_mat[0][0],
    "FalsePositive": conf_mat[1][0],
    "FalseNegative": conf_mat[0][1],
}
# confusion["TrueNegative"]
accuracy = (confusion["TrueNegative"] + confusion["TruePositive"]) / sum(confusion.values())

In [118]:
def printDict(obj):
    if isinstance(obj, dict):
        for k, v in obj.items():
            if hasattr(v, '__iter__'):
                print(k)
                printDict(v)
            else:
                print('%s\t:\t%s' % (k, v))
    elif isinstance(obj, list):
        for v in obj:
            if hasattr(v, '__iter__'):
                printDict(v)
            else:
                print(v)
    else:
        print(obj)

separator = "\t---------------------------------\n"
print(f'\nConfusion Matrix:\n{separator}\t|\t{ confusion["TruePositive"] }\t|\t{confusion["FalsePositive"]}\t| \n{separator}\t|\t{confusion["FalseNegative"]}\t|\t{confusion["TrueNegative"]}\t|\n{separator}')
printDict(confusion)


Confusion Matrix:
	---------------------------------
	|	21	|	11	| 
	---------------------------------
	|	7	|	5	|
	---------------------------------

TruePositive	:	21
TrueNegative	:	5
FalsePositive	:	11
FalseNegative	:	7


In [115]:
# displaying the metrics
print(f"\n\tACCURACY = {round(accuracy * 100, 2)}%\n")
print(classification_report(y_test, predictions, target_names=TARGET_NAMES))


	accuracy = 59.09%

                        precision    recall  f1-score   support

   Metastasis, class 0       0.31      0.42      0.36        12
No metastasis, class 1       0.75      0.66      0.70        32

           avg / total       0.63      0.59      0.61        44



In [110]:
# save the model
model.save('./models/denseModel.h5')

In [111]:
# load the model
# from keras.models import load_model
# newModel = load_model('./models/denseModel.h5')

# use the loaded model to predict classes
# x_test is already after scaling!
# newModel.predict_classes(scaled_X_train)