In [None]:
from numpy import loadtxt
from keras.models import Sequential
from keras.layers import Dense

#We can select the first 8 columns from index 0 to index 7 via the slice 0:8. 
#We can then select the output column (the 9th variable) via index 8.

# load the dataset
dataset = loadtxt('diabetes.csv', delimiter=',')
# split into input (X) and output (y) variables
X = dataset[:,0:8]
y = dataset[:,8]


In [None]:
# 1-We create a Sequential model and add layers one at a time until we are happy with our network architecture.
# 2-The first thing to get right is to ensure the input layer has the right number of input features. 
# 3- This can be specified when creating the first layer with the input_dim argument and 
# 4- setting it to 8 for the 8 input variables.

#How do we know the number of layers and their types?

# In this example, we will use a fully-connected network structure with three layers.
# Fully connected layers are defined using the Dense class. 
# We can specify the number of neurons or nodes in 
# the layer as the first argument, and specify the activation function using the activation argument.
# We will use the rectified linear unit activation function referred to as ReLU on the first two layers 
# and the Sigmoid function in the output layer.
#

# It used to be the case that Sigmoid and Tanh activation functions were preferred for all layers. 
# These days, better performance is achieved using the ReLU activation function. 
# We use a sigmoid on the output layer to ensure our network output is between 0 and 1 
# and easy to map to either a probability of class 1 or snap to a hard classification of either class 
# with a default threshold of 0.5.
#

#We can piece it all together by adding each layer:
    #The model expects rows of data with 8 variables (the input_dim=8 argument)
    #The first hidden layer has 12 nodes and uses the relu activation function.
    #The second hidden layer has 8 nodes and uses the relu activation function.
    #The output layer has one node and uses the sigmoid activation function.

# define the keras model

model = Sequential()
model.add(Dense(12, input_dim=8, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(4, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

In [None]:
#Now that the model is defined, we can compile it.

    # 1-Compiling the model uses the efficient numerical libraries under the covers (the so-called backend) 
    # 2-such as Theano or TensorFlow. 
    # 3-The backend automatically chooses the best way to represent the network for training and making predictions 
    # to run on your hardware, such as CPU or GPU or even distributed.

#When compiling, we must specify some additional properties required when training the network. 
#Remember training a network means finding the best set of weights to map inputs to outputs in our dataset.

#We must specify the loss function to use to evaluate a set of weights, 
#the optimizer is used to search through different weights for the network and any optional 
#metrics we would like to collect and report during training.

    # 1-In this case, we will use cross entropy as the loss argument. 
    # 2-This loss is for a binary classification problems and is defined in Keras as “binary_crossentropy“. 
    # 3-We will define the optimizer as the efficient stochastic gradient descent algorithm “adam“. 
    # 4-This is a popular version of gradient descent because it automatically tunes itself and gives good results 
    # in a wide range of problems. 

#Finally, because it is a classification problem, we will collect and report the classification accuracy, 
#defined via the metrics argument.

# compile the keras model
model.compile(loss='binary_crossentropy', optimizer='SGD', metrics=['accuracy'])

In [None]:
# fit the keras model on the dataset
model.fit(X, y, epochs=150, batch_size=10)

Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78

Epoch 80/150
Epoch 81/150
Epoch 82/150
Epoch 83/150
Epoch 84/150
Epoch 85/150
Epoch 86/150
Epoch 87/150
Epoch 88/150
Epoch 89/150
Epoch 90/150
Epoch 91/150
Epoch 92/150
Epoch 93/150
Epoch 94/150
Epoch 95/150
Epoch 96/150
Epoch 97/150
Epoch 98/150
Epoch 99/150
Epoch 100/150
Epoch 101/150
Epoch 102/150
Epoch 103/150
Epoch 104/150
Epoch 105/150
Epoch 106/150
Epoch 107/150
Epoch 108/150
Epoch 109/150
Epoch 110/150
Epoch 111/150
Epoch 112/150
Epoch 113/150
Epoch 114/150
Epoch 115/150
Epoch 116/150
Epoch 117/150
Epoch 118/150
Epoch 119/150
Epoch 120/150
Epoch 121/150
Epoch 122/150
Epoch 123/150
Epoch 124/150
Epoch 125/150
Epoch 126/150
Epoch 127/150
Epoch 128/150
Epoch 129/150
Epoch 130/150
Epoch 131/150
Epoch 132/150
Epoch 133/150
Epoch 134/150
Epoch 135/150
Epoch 136/150
Epoch 137/150
Epoch 138/150
Epoch 139/150
Epoch 140/150
Epoch 141/150
Epoch 142/150
Epoch 143/150
Epoch 144/150
Epoch 145/150
Epoch 146/150
Epoch 147/150
Epoch 148/150
Epoch 149/150
Epoch 150/150


<keras.callbacks.callbacks.History at 0x63e743b50>

In [None]:
# evaluate the keras model
_, accuracy = model.evaluate(X, y)
print('Accuracy: %.2f' % (accuracy*100))

Accuracy: 70.57


In [None]:
#Make Predictions

#Making predictions is as easy as calling the predict() function on the model. 
#We are using a sigmoid activation function on the output layer, so the predictions will be a probability 
#in the range between 0 and 1.
#We can easily convert them into a crisp binary prediction for this classification task by rounding them.
# make class predictions with the model

#Prediction probability
# make probability predictions with the model
predictions = model.predict(X)
# round predictions 
i=0
for x in predictions:
    print ("%s => %f (expected %d)"% (X[i].tolist(), x[0], y[i]))
    i+=1

print ("#Class Prediction: ")
predictions = model.predict_classes(X)
# summarize the first 5 cases
for i in range(5):
	print('%s => %d (expected %d)' % (X[i].tolist(), predictions[i], y[i]))

[6.0, 148.0, 72.0, 35.0, 0.0, 33.6, 0.627, 50.0] => 0.535304 (expected 1)
[1.0, 85.0, 66.0, 29.0, 0.0, 26.6, 0.351, 31.0] => 0.166631 (expected 0)
[8.0, 183.0, 64.0, 0.0, 0.0, 23.3, 0.672, 32.0] => 0.708059 (expected 1)
[1.0, 89.0, 66.0, 23.0, 94.0, 28.1, 0.167, 21.0] => 0.152811 (expected 0)
[0.0, 137.0, 40.0, 35.0, 168.0, 43.1, 2.288, 33.0] => 0.596454 (expected 1)
[5.0, 116.0, 74.0, 0.0, 0.0, 25.6, 0.201, 30.0] => 0.233911 (expected 0)
[3.0, 78.0, 50.0, 32.0, 88.0, 31.0, 0.248, 26.0] => 0.270659 (expected 1)
[10.0, 115.0, 0.0, 0.0, 0.0, 35.3, 0.134, 29.0] => 0.531202 (expected 0)
[2.0, 197.0, 70.0, 45.0, 543.0, 30.5, 0.158, 53.0] => 0.592424 (expected 1)
[8.0, 125.0, 96.0, 0.0, 0.0, 0.0, 0.232, 54.0] => 0.146449 (expected 1)
[4.0, 110.0, 92.0, 0.0, 0.0, 37.6, 0.191, 30.0] => 0.148611 (expected 0)
[10.0, 168.0, 74.0, 0.0, 0.0, 38.0, 0.537, 34.0] => 0.663970 (expected 1)
[10.0, 139.0, 80.0, 0.0, 0.0, 27.1, 1.441, 57.0] => 0.409984 (expected 0)
[1.0, 189.0, 60.0, 23.0, 846.0, 30.1, 0.3