In [61]:
# Testing out techniques from:
# http://neuralnetworksanddeeplearning.com/chap3.html#how_to_choose_a_neural_network's_hyper-parameters

from keras import backend as k
from keras.models import Sequential
from keras.layers import Dense
from keras.objectives import categorical_crossentropy
from keras.metrics import categorical_accuracy as accuracy
from keras import optimizers

import numpy as np
import pandas as pd
from sklearn.cross_validation import train_test_split

## Data preparation

In [19]:
# Read in the iris data set
data = pd.read_csv("iris.csv")
data.describe()

Unnamed: 0,ID,Sepal.Length,Sepal.Width,Petal.Length,Petal.Width
count,150.0,150.0,150.0,150.0,150.0
mean,75.5,5.843333,3.057333,3.758,1.199333
std,43.445368,0.828066,0.435866,1.765298,0.762238
min,1.0,4.3,2.0,1.0,0.1
25%,38.25,5.1,2.8,1.6,0.3
50%,75.5,5.8,3.0,4.35,1.3
75%,112.75,6.4,3.3,5.1,1.8
max,150.0,7.9,4.4,6.9,2.5


In [12]:
data.columns

Index(['ID', 'Sepal.Length', 'Sepal.Width', 'Petal.Length', 'Petal.Width',
       'Species'],
      dtype='object')

In [23]:
data["label_num"] = pd.factorize(data["Species"])[0]

In [24]:
data.dtypes

ID                int64
Sepal.Length    float64
Sepal.Width     float64
Petal.Length    float64
Petal.Width     float64
Species          object
label_num         int64
dtype: object

In [25]:
data.sample(10)

Unnamed: 0,ID,Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species,label_num
131,132,7.9,3.8,6.4,2.0,virginica,2
121,122,5.6,2.8,4.9,2.0,virginica,2
96,97,5.7,2.9,4.2,1.3,versicolor,1
65,66,6.7,3.1,4.4,1.4,versicolor,1
26,27,5.0,3.4,1.6,0.4,setosa,0
69,70,5.6,2.5,3.9,1.1,versicolor,1
119,120,6.0,2.2,5.0,1.5,virginica,2
90,91,5.5,2.6,4.4,1.2,versicolor,1
13,14,4.3,3.0,1.1,0.1,setosa,0
33,34,5.5,4.2,1.4,0.2,setosa,0


In [27]:
data_numeric = data.drop(["ID", "Species"], axis=1)

In [28]:
# Convert to numpy array
data_array = data_numeric.as_matrix()

In [34]:
# Split into train, validation, and test
x = data_array[:,:-1]
y = data_array[:,-1:]

x_trainval, x_test, y_trainval, y_test = train_test_split(x, y, test_size=0.1, train_size=0.9)
x_train, x_val, y_train, y_val = train_test_split(x_trainval, y_trainval, test_size=.10, train_size=0.9)

In [35]:
x_train[0:5]

array([[ 5.7,  2.8,  4.1,  1.3],
       [ 5.8,  2.7,  3.9,  1.2],
       [ 5. ,  3.6,  1.4,  0.2],
       [ 6.3,  2.7,  4.9,  1.8],
       [ 4.8,  3.1,  1.6,  0.2]])

In [36]:
y_train[0:5]

array([[ 1.],
       [ 1.],
       [ 0.],
       [ 2.],
       [ 0.]])

In [41]:
len(x_train)

121

In [37]:
len(x_val)

14

In [38]:
len(x_test)

15

In [42]:
len(y_train)

121

In [39]:
len(y_val)

14

In [40]:
len(y_test)

15

## Neural Networking Tuning

In [73]:
# Start with the simplest network likely to do meaningful learning
model = Sequential()

# Network is (4,3): 4 inputs (4 features), 3 outputs (3 classes)
model.add(Dense(3, input_dim=4, activation='sigmoid'))
model.compile(loss='sparse_categorical_crossentropy', optimizer='adagrad', metrics=['accuracy'])
model.fit(x_train, y_train, nb_epoch=50, batch_size=20)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x13e713b00>

In [74]:
# Try adding in another layer
model_2 = Sequential()

# Network is (4,4,3): 4 inputs, 4 hidden neurons, 3 outputs
model_2.add(Dense(4, input_dim=4, activation='sigmoid'))
model_2.add(Dense(3, activation='sigmoid'))
model_2.compile(loss='sparse_categorical_crossentropy', optimizer='adagrad', metrics=['accuracy'])
model_2.fit(x_train, y_train, nb_epoch=50, batch_size=20)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x13e9f2e80>

In [75]:
# Stochastic Gradient Descent
model_3 = Sequential()

# Network is (4,4,3): 4 inputs, 4 hidden neurons, 3 outputs
model_3.add(Dense(4, input_dim=4, activation='sigmoid'))
model_3.add(Dense(3, activation='sigmoid'))

sgd = optimizers.SGD(lr=0.01)

model_3.compile(loss='sparse_categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
model_3.fit(x_train, y_train, nb_epoch=50, batch_size=20)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x13f97b048>

In [86]:
# Values are consistently staying low
# Let's try to increase the learning rate
model_4 = Sequential()

# Network is (4,4,3): 4 inputs, 4 hidden neurons, 3 outputs
model_4.add(Dense(4, input_dim=4, activation='sigmoid'))
model_4.add(Dense(3, activation='sigmoid'))
sgd = optimizers.SGD(lr=0.1)
model_4.compile(loss='sparse_categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
model_4.fit(x_train, y_train, nb_epoch=50, batch_size=20)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x13fd85940>

In [96]:
# Much better! We got into the 90s! 
# Let's keep the learning rate at 0.1 and see if we can improve more
# Weight decay trials

# Tried lambda-5,10,20,200 - unable to see improvements
# Trying much smaller values helps a little, but not much improvement over no weight decay

model_5 = Sequential()

# Network is (4,4,3): 4 inputs, 4 hidden neurons, 3 outputs
model_5.add(Dense(4, input_dim=4, activation='sigmoid'))
model_5.add(Dense(3, activation='sigmoid'))
sgd = optimizers.SGD(lr=0.1, decay=1e-6)
model_5.compile(loss='sparse_categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
model_5.fit(x_train, y_train, nb_epoch=50, batch_size=20)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x144558b38>

In [97]:
# Momentum trials

model_6 = Sequential()

# Network is (4,4,3): 4 inputs, 4 hidden neurons, 3 outputs
model_6.add(Dense(4, input_dim=4, activation='sigmoid'))
model_6.add(Dense(3, activation='sigmoid'))
sgd = optimizers.SGD(lr=0.1, momentum=0.9)
model_6.compile(loss='sparse_categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
model_6.fit(x_train, y_train, nb_epoch=50, batch_size=20)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x146e57dd8>

In [98]:
# Momentum = 0.9 seems to have helped, what if we increase it?

model_7 = Sequential()

# Network is (4,4,3): 4 inputs, 4 hidden neurons, 3 outputs
model_7.add(Dense(4, input_dim=4, activation='sigmoid'))
model_7.add(Dense(3, activation='sigmoid'))
sgd = optimizers.SGD(lr=0.1, momentum=0.95)
model_7.compile(loss='sparse_categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
model_7.fit(x_train, y_train, nb_epoch=50, batch_size=20)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x146fa5f98>

In [99]:
# Not as good...and decreasing to 0.85?
model_8 = Sequential()

# Network is (4,4,3): 4 inputs, 4 hidden neurons, 3 outputs
model_8.add(Dense(4, input_dim=4, activation='sigmoid'))
model_8.add(Dense(3, activation='sigmoid'))
sgd = optimizers.SGD(lr=0.1, momentum=0.85)
model_8.compile(loss='sparse_categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
model_8.fit(x_train, y_train, nb_epoch=50, batch_size=20)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x144eda278>

In [105]:
# This looks good, and we didn't see the dip back down to ~.6 at epoch 50

# So far we have:
# momentum: 0.85
# learning rate: 0.1
# network is (4,4,3) fully connected sigmoid units

# How about early stopping?
from keras.callbacks import EarlyStopping

model_9 = Sequential()

# Network is (4,4,3): 4 inputs, 4 hidden neurons, 3 outputs
model_9.add(Dense(4, input_dim=4, activation='sigmoid'))
model_9.add(Dense(3, activation='sigmoid'))

sgd = optimizers.SGD(lr=0.1, momentum=0.85)
early_stopping = EarlyStopping(monitor='val_loss', patience=5)

model_9.compile(loss='sparse_categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
model_9.fit(x_train, y_train, nb_epoch=100, batch_size=20, callbacks=[early_stopping], validation_data=(x_val, y_val))

Train on 121 samples, validate on 14 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100


<keras.callbacks.History at 0x1469c3668>