In [1]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import pandas as pd
import numpy as np
import sklearn
import tensorflow as tf
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, cohen_kappa_score

In [None]:
# Download iris dataset
!wget http://www.kasprowski.pl/datasets/iris.data

## Build the network (model)

In [2]:
model = Sequential()
model.add(Dense(50, input_dim=4, activation='sigmoid'))
model.add(Dense(50, activation='sigmoid'))
model.add(Dense(1, activation='sigmoid'))
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 50)                250       
_________________________________________________________________
dense_1 (Dense)              (None, 50)                2550      
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 51        
Total params: 2,851
Trainable params: 2,851
Non-trainable params: 0
_________________________________________________________________


## Load data (and remove one class)

In [3]:
data = pd.read_csv('iris.data')
print("Rows:",len(data))
data = data.drop(data[data.iris=='Iris-virginica'].index)
print("Rows after removal of 'Iris-virginica':",len(data))

Rows: 150
Rows after removal of 'Iris-virginica': 100


## Prepare samples and labels

In [4]:
print("Classes:",set(data['iris']))
samples = data.to_numpy()[:,:4]
samples = samples.astype(float)
labels = data.to_numpy()[:,4]

labels[labels[:]=='Iris-versicolor']=0
labels[labels[:]=='Iris-setosa']=1
#labels[labels[:]=='Iris-virginica']=2
labels = labels.astype(float)
print("Classes after renaming:",set(labels))

print("Samples:",samples.shape)
print("Labels:",labels.shape)


Classes: {'Iris-setosa', 'Iris-versicolor'}
Classes after renaming: {0.0, 1.0}
Samples: (100, 4)
Labels: (100,)


## Compile and fit model

In [12]:
import sklearn.model_selection
(trainSamples, testSamples, trainLabels, testLabels) = sklearn.model_selection.train_test_split(samples, labels, random_state=1)

model.compile(loss='binary_crossentropy', optimizer="adam",metrics=['accuracy'])

In [7]:
H = model.fit(trainSamples, trainLabels, epochs=10,batch_size=10)    

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## Evaluate model

In [8]:
testResults = model.predict(testSamples)
testResults = (testResults*2).astype(dtype=int) #conversion to (0,1)
print(testResults.T)
print(confusion_matrix(testLabels, testResults))
print(classification_report(testLabels, testResults))
print("Cohen's Kappa: {}".format(cohen_kappa_score(testLabels, testResults)))
print("Accuracy: ",accuracy_score(testLabels, testResults))


[[0 0 1 0 0 1 1 0 0 0 0 1 0 0 0 1 1 1 0 1 1 0 0 0 1]]
[[15  0]
 [ 0 10]]
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00        15
         1.0       1.00      1.00      1.00        10

    accuracy                           1.00        25
   macro avg       1.00      1.00      1.00        25
weighted avg       1.00      1.00      1.00        25

Cohen's Kappa: 1.0
Accuracy:  1.0


# Dataset with three classes

In [13]:
data = pd.read_csv('iris.data')
print("Rows:",len(data))
print("Classes:",set(data['iris']))
samples = data.to_numpy()[:,:4] 
labels = data.to_numpy()[:,4]
samples = samples.astype(float)

labels[labels[:]=='Iris-versicolor']=0
labels[labels[:]=='Iris-setosa']=1
labels[labels[:]=='Iris-virginica']=2
labels = labels.astype(float)

print("Samples:",samples.shape)
print("Labels:",labels.shape)

Rows: 150
Classes: {'Iris-setosa', 'Iris-versicolor', 'Iris-virginica'}
Samples: (150, 4)
Labels: (150,)


In [31]:
import sklearn.model_selection
(trainSamples, testSamples, trainLabels, testLabels) = sklearn.model_selection.train_test_split(samples, labels)

In [14]:
#model.compile(loss='binary_crossentropy', optimizer="adam",metrics=['accuracy'])
#model.compile(loss='sparse_categorical_crossentropy', optimizer="adam",metrics=['accuracy'])
model.compile(loss='categorical_crossentropy', optimizer="adam",metrics=['accuracy'])

In [15]:
H = model.fit(trainSamples, trainLabels, epochs=10,batch_size=10)    

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## Evaluate model

In [16]:
testResults = model.predict(testSamples)
print(testResults.T)
testResults = (testResults*2).astype(dtype=int) #conversion to (0,1)
print(testResults.T)
print(confusion_matrix(testLabels, testResults))
print(classification_report(testLabels, testResults))
print("Cohen's Kappa: {}".format(cohen_kappa_score(testLabels, testResults)))
print("Accuracy: ",accuracy_score(testLabels, testResults))

[[0.6360789  0.50631374 0.45345643 0.6251399  0.3924384  0.4409843
  0.41659284 0.5942573  0.6035379  0.39765352 0.45365062 0.6062966
  0.39212823 0.44957143 0.44303638 0.61713296 0.46365035 0.44234675
  0.6093655  0.62401736 0.44505614 0.44128665 0.4259042  0.62093395
  0.39571154 0.45991272 0.6284845  0.6155497  0.43910074 0.4140893
  0.4426362  0.38697803 0.46730435 0.39534396 0.40525287 0.6169968
  0.4558947  0.61761975]]
[[1 1 0 1 0 0 0 1 1 0 0 1 0 0 0 1 0 0 1 1 0 0 0 1 0 0 1 1 0 0 0 0 0 0 0 1
  0 1]]
[[15  1  0]
 [ 0 13  0]
 [ 9  0  0]]
              precision    recall  f1-score   support

         0.0       0.62      0.94      0.75        16
         1.0       0.93      1.00      0.96        13
         2.0       0.00      0.00      0.00         9

    accuracy                           0.74        38
   macro avg       0.52      0.65      0.57        38
weighted avg       0.58      0.74      0.65        38

Cohen's Kappa: 0.5671981776765376
Accuracy:  0.7368421052631579


  _warn_prf(average, modifier, msg_start, len(result))


What is the problem?
The output is a number - no way that ANN learns the proper output!

## Change to one-hot encoding

In [17]:
print(labels.shape)
labels = tf.keras.utils.to_categorical(labels)
print(labels.shape)

(150,)
(150, 3)


## New model (output: vector of 3 values)

In [18]:
model = Sequential()
model.add(Dense(50, input_dim=4, activation='sigmoid'))
model.add(Dense(50, activation='sigmoid'))
#model.add(Dense(1, activation='sigmoid'))
#model.add(Dense(3, activation='sigmoid')) # three values
model.add(Dense(3, activation='softmax')) # three values and normalization (output sums to 1)

model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 50)                250       
_________________________________________________________________
dense_4 (Dense)              (None, 50)                2550      
_________________________________________________________________
dense_5 (Dense)              (None, 3)                 153       
Total params: 2,953
Trainable params: 2,953
Non-trainable params: 0
_________________________________________________________________


### Train model

In [20]:
(trainSamples, testSamples, trainLabels, testLabels) = sklearn.model_selection.train_test_split(samples, labels, random_state=1)
model.compile(loss='categorical_crossentropy', optimizer="adam",metrics=['accuracy'])

In [21]:
H = model.fit(trainSamples, trainLabels, epochs=10,batch_size=10, validation_data=(testSamples,testLabels))    

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


### Evaluate model (it is one-hot encoded!)

In [22]:
testResults = model.predict(testSamples)
print(testResults)
print(testResults.argmax(axis=1))

print(confusion_matrix(testLabels.argmax(axis=1), testResults.argmax(axis=1)))
print(classification_report(testLabels.argmax(axis=1), testResults.argmax(axis=1)))
print("Cohen's Kappa: {}".format(cohen_kappa_score(testLabels.argmax(axis=1), testResults.argmax(axis=1))))
print("Accuracy: ",accuracy_score(testLabels.argmax(axis=1), testResults.argmax(axis=1)))


[[0.2754268  0.4555409  0.26903233]
 [0.31295207 0.33206806 0.3549799 ]
 [0.31403086 0.28844523 0.39752394]
 [0.27972707 0.44285664 0.27741626]
 [0.3122603  0.2395349  0.44820485]
 [0.31417018 0.27597788 0.40985197]
 [0.31682265 0.25543204 0.42774525]
 [0.28802577 0.41352323 0.29845104]
 [0.28845218 0.4252173  0.28633052]
 [0.31717503 0.2417003  0.44112465]
 [0.317479   0.28852227 0.39399865]
 [0.284422   0.4247806  0.29079744]
 [0.3181494  0.23993573 0.4419149 ]
 [0.31372255 0.2836975  0.40257993]
 [0.31672138 0.27899808 0.4042805 ]
 [0.28546843 0.43807456 0.276457  ]
 [0.31647348 0.29766592 0.38586056]
 [0.3170057  0.27757564 0.4054187 ]
 [0.2857676  0.42858297 0.28564942]
 [0.28267145 0.44495496 0.2723735 ]
 [0.31900325 0.28275433 0.39824244]
 [0.31753427 0.2765241  0.40594164]
 [0.31645054 0.26578194 0.41776752]
 [0.2825546  0.44040966 0.27703574]
 [0.31634772 0.243547   0.44010532]
 [0.31604832 0.29323345 0.39071822]
 [0.27693877 0.44769377 0.27536753]
 [0.2825647  0.4347454  0.28

In [23]:
sample = [[1,2,3,4]]
pred = model.predict(sample)
print(sample,'->',pred)

pred = model.predict(testSamples[10:14])
for p,t in zip(pred[0:4],testLabels[10:14]):
    print(p,"->",t)


[[1, 2, 3, 4]] -> [[0.34382352 0.25904652 0.39712992]]
[0.317479   0.28852227 0.39399865] -> [1. 0. 0.]
[0.284422   0.4247806  0.29079744] -> [0. 1. 0.]
[0.3181494  0.23993573 0.4419149 ] -> [0. 0. 1.]
[0.31372255 0.2836975  0.40257993] -> [1. 0. 0.]


In [12]:
model.save('iris.h5')