In [1]:
import numpy
from pandas import read_csv
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline

Using TensorFlow backend.


In [2]:
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)

## Dataset

In [4]:
# load dataset
dataframe = read_csv("../datasets/iris.csv", header=None)
dataset = dataframe.values

In [5]:
dataframe.head()

Unnamed: 0,0,1,2,3,4
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [6]:
dataframe.describe()

Unnamed: 0,0,1,2,3
count,150.0,150.0,150.0,150.0
mean,5.843333,3.054,3.758667,1.198667
std,0.828066,0.433594,1.76442,0.763161
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [7]:
X = dataset[:,0:4].astype(float)
Y = dataset[:,4]
print X.shape
print numpy.unique(Y)

(150, 4)
['Iris-setosa' 'Iris-versicolor' 'Iris-virginica']


## Encode Output

In [11]:
# encode class values as integers
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)
# convert integers to dummy variables (i.e. one hot encoded)
dummy_y = np_utils.to_categorical(encoded_Y)
print numpy.unique(encoded_Y)

[0 1 2]


## Neural network model

To solve the problem, I'll create a simple neural network with 4 inputs (X.shape columns) 8 hiddens layers and 3 outputs (encoded_Y)

In [12]:
# define baseline model
def baseline_model():
    # create model
    model = Sequential()
    # input dim 4 = input layrs (X.sahpe columns), 8 hidden layers full connected (DENSE) with relu activation function
    model.add(Dense(8, input_dim=4, activation= 'relu' ))
    #Output layer with 3 nodes and softmax activation function (better to multiclass problemn)
    model.add(Dense(3, activation= 'softmax'))
    # Compile model
    # loss = categorial because it's not a binary classification, default adam gradient descent
    model.compile(loss= 'categorical_crossentropy' , optimizer= 'adam' , metrics=[ 'accuracy' ])
    return model

In [13]:
estimator = KerasClassifier(build_fn=baseline_model, epochs=200, batch_size=5, verbose=0)

## Evaluate the model

In [14]:
kfold = KFold(n_splits=10, shuffle=True, random_state=seed)

In [None]:
results = cross_val_score(estimator, X, dummy_y, cv=kfold)
print("Accuracy: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))