## Objective: Multiclass Classification with the Iris Flowers Dataset ##

In [1]:
# Necessary imports
import numpy
import pandas
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline

# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)

Using TensorFlow backend.


## Step 1: Load the dataset ##

In [5]:
# load dataset
dataframe = pandas.read_csv("iris.csv", header=None)
dataset = dataframe.values

# Check values
dataset[:5]

array([[5.1, 3.5, 1.4, 0.2, 'Iris-setosa'],
       [4.9, 3.0, 1.4, 0.2, 'Iris-setosa'],
       [4.7, 3.2, 1.3, 0.2, 'Iris-setosa'],
       [4.6, 3.1, 1.5, 0.2, 'Iris-setosa'],
       [5.0, 3.6, 1.4, 0.2, 'Iris-setosa']], dtype=object)

## Step 2: Data preprocessing and splitting ##

In [6]:
'''
Split data into feature and target variables
'''
X = dataset[:, 0:4].astype(float)
Y = dataset[:, 4]

In [7]:
'''
Class values are: Iris-sentosa, Iris-versicolor, Iris-virginica

One hot encoding the class variable values using to_categorical()
'''
encoder = LabelEncoder()

# Fit encoder on labels
encoder.fit(Y)
encoded_Y = encoder.transform(Y)

# convert integers to dummy variables (i.e. one hot encoded)
dummy_y = np_utils.to_categorical(encoded_Y)

# Check dummy values
dummy_y[:5]

array([[ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.]])

## Step 3: Define model ##

In [8]:
'''
NN model:

4 inputs -> 4 hidden nodes -> 3 outputs
'''
# define baseline model
def baseline_model():
    # create model
    model = Sequential()
    # Input layer: 4 input features
    model.add(Dense(4, # 4 neurons, can be any number of neurons
                    input_dim=4, # number of features
                    init='normal', 
                    activation='relu'))
    # Output layer: 3 output class predictions
    model.add(Dense(3, 
                    init='normal', 
                    activation='sigmoid')) # to ensure outputs are between 0,1 with probabilistic values
    # Compile model
    model.compile(loss='categorical_crossentropy', 
                  optimizer='adam', 
                  metrics=['accuracy'])
    return model

# Estimator
estimator = KerasClassifier(build_fn=baseline_model, 
                            nb_epoch=200, 
                            batch_size=5, 
                            verbose=0)

# 10 splits
kfold = KFold(n_splits=10, 
              shuffle=True, 
              random_state=seed)

## Step 4: Performance evaluation ##

In [9]:
results = cross_val_score(estimator, 
                          X, 
                          dummy_y, 
                          cv=kfold)

# Print results
print("Accuracy: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Accuracy: 95.33% (4.27%)
