In [92]:
# Libraries to import
# The 'sklearn' libraries are only to use the Cross Validation method
# The 'keras' libraries are used for the neural network
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.model_selection import StratifiedKFold
from keras.wrappers.scikit_learn import BaseWrapper
import sklearn.metrics as sm
import copy

In [93]:
dataset = pd.read_csv(
    filepath_or_buffer='https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', 
    header=None, 
    sep=',')

dataset.columns=['sepal_len', 'sepal_wid', 'petal_len', 'petal_wid', 'class']
dataset.dropna(how="all", inplace=True) # drops the empty line at file-end

dataset.tail()


Unnamed: 0,sepal_len,sepal_wid,petal_len,petal_wid,class
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica
149,5.9,3.0,5.1,1.8,Iris-virginica


In [94]:
#Convert dataframe to values
X=dataset.values
#X
#X[inds[:(n_train+n_test)],0:4]
#X[inds[:(n_train+n_test)],4]

In [95]:
# Nbs of train, test and valid data points
n_train = 50
n_test = 50
n_valid = 50

# Random shuffle
seed = 10
np.random.seed(10)
inds = np.arange(dataset.shape[0])
np.random.shuffle(inds)
X_train_test = X[inds[:(n_train+n_test)],0:4]
Y_train_test = X[inds[:(n_train+n_test)],4]
X_valid = X[inds[(n_train+n_test):150],0:4]
Y_valid = X[inds[(n_train+n_test):150],4]
#X_train_test

In [96]:
inds

array([ 87, 111,  10,  91,  49,  60,  72,  67,  39,  55,  66, 142,  53,
         1,  19, 112,  85,  38,  21,  35, 102, 132, 126,  24,  61,   2,
        95,  90,  76, 117,  58,  97, 129, 114, 146,  47, 124, 120, 118,
       141,  26,  43,  59,  41,  56,  32,  52,  70, 121, 144,  68, 109,
        81,  78,  51,  14,  48,  63,  20, 137,  29,   3, 106,  98, 140,
        37, 139,  45,  82,  83,  42, 143, 131, 116, 105, 133,   6,  79,
       108,  99,  34, 128,  50,  96,   4,   5,  44,  84, 130, 103,  75,
         7,  46,  17, 104, 101,  71,  80, 110, 147, 149, 134,  74,  28,
        11,  94,  23,  22, 127,  93,  18,  27,  36,  57,  31,  65,  12,
        89, 119,  30,  86,  92, 148,  25, 138,  13,  69,  77, 135, 136,
        33,  62, 122, 107,  88,  54, 100,  16, 115,  40,   0,  73,   8,
       145, 123, 113,  64,  15, 125,   9])

In [97]:
encoder = LabelEncoder()
encoder.fit(Y_train_test)
encoded_Y = encoder.transform(Y_train_test)
dummy_y_train_test = np_utils.to_categorical(encoded_Y)

In [98]:
encoder = LabelEncoder()
encoder.fit(Y_valid)
encoded_Y_valid = encoder.transform(Y_valid)
encoded_Y_valid

array([2, 2, 1, 0, 0, 1, 0, 0, 2, 1, 0, 0, 0, 1, 0, 1, 0, 1, 2, 0, 1, 1, 2,
       0, 2, 0, 1, 1, 2, 2, 0, 1, 2, 2, 1, 1, 2, 0, 2, 0, 0, 1, 0, 2, 2, 2,
       1, 0, 2, 0], dtype=int64)

In [99]:
#dummy_y_train_test
#encoded_Y
X_train_test.shape[1]

4

In [100]:
# Define the neural network model - 1 hidden layer
# In the definition of the function, we set "input_dim", "output_dim" and "activation" to None (see next box)
# The reason is that we will input those parameters in the function
def baseline_model(input_dim=None, output_dim=None, activation=None):
    
    # 1) Create model
    model = Sequential()
    
    # 2) Input layer:
    # input_dim  : dimension of the features
    # output_dim : number of hidden neurons
    # activation : function "h_{2}" in the PDF. Here, we will use 'relu'
    # init is always at "normal"
    model.add(Dense(output_dim = output_dim, input_dim = input_dim, init = 'normal', activation = 'relu'))
    
    # 3) Output layer:
    # The output dim is "3" because there's 3 categories
    # Init is always at "normal"
    model.add(Dense(output_dim = 3, init = 'normal', activation = 'softmax'))
    
    # 4) Compile model
    # The loss function for the classification problem is called 'categorical_crossentropy'
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    #model.evaluate(X_train_test, dummy_y_train_test,batch_size=32)
    return model

In [101]:
# Calls the function "baseline_model" with the inputs needed
esti = KerasClassifier(build_fn=baseline_model, nb_epoch = 100, batch_size=32, verbose=0, input_dim = X_train_test.shape[1], output_dim = 15)
#estimator = KerasClassifier(build_fn=baseline_model, nb_epoch = nb_epoch[j], batch_size=32, verbose=0, input_dim = X.shape[1], output_dim = d_out[i])
# Use the K-Fold cross validation technique with K = 10
#kfold = KFold(n_splits=10, shuffle=True, random_state=seed)
kfold = KFold(n_splits=10, shuffle=True, random_state=seed)
#kfold
#.evaluate(X_train_test, dummy_y_train_test,batch_size=32)
results = cross_val_score(esti, X_train_test, dummy_y_train_test, cv=kfold)
#kfold.split(X_train_test,dummy_y_train_test)
print("Accuracy: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Accuracy: 87.00% (12.69%)


In [67]:
print("Accuracy: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Accuracy: 65.00% (10.25%)


In [70]:
#This does the hyperparameter selection
import warnings #So that it doesn't show a warning message
warnings.filterwarnings("ignore", category=DeprecationWarning)

# Optimization process - step 2 to 5 in the PDF
d_out = np.arange(start=10, stop=30, step=2)      # Grid of hidden neurons
nb_epoch = np.arange(start=30,stop=130,step=10)    # Grid of the number of epochs
batch_size = 32                   # Batch size for the gradient descent : see description link in PDF
#output_dim = 3                    # Dimension of the output is the number of classes, 3 in our case
input_dim = X_train_test.shape[0] # Dimension of the initial input (100 data points in our case)
results_opt = 0                   # Variable to stock the best results

# Loop for the first hyperparameter : nbs of hidden neurons
for i in range(len(d_out)):
    
    # Loop for the second hyperparameter : nbs of epochs
    for j in range(len(nb_epoch)):
        
        # Calls the function "baseline_model" with the inputs needed
        esti = KerasClassifier(build_fn=baseline_model, nb_epoch = nb_epoch[j], batch_size=32, verbose=0, input_dim = X_train_test.shape[1], output_dim = d_out[i])
        #estimator = KerasClassifier(build_fn=baseline_model, nb_epoch = nb_epoch[j], batch_size=32, verbose=0, input_dim = X.shape[1], output_dim = d_out[i])
        # Use the K-Fold cross validation technique with K = 10
        kfold = KFold(n_splits=10, shuffle=True, random_state=seed)
        
        # Calculate the % of classification success for each of the K-Fold combinaison
        results = cross_val_score(esti, X_train_test, dummy_y_train_test, cv=kfold)
        
        # Check if we get better results than our last best
        if(results.mean()*100 > results_opt):
            d_out_opt = d_out[i]
            nb_epoch_opt = nb_epoch[j]
            results_opt = results.mean()*100

In [71]:
# Results on the Test set with the K-fold method as well as the optimal nbs of hidden neurons and hidden layer
print(results_opt)
print(d_out_opt)
print(nb_epoch_opt)

95.9999996424
20
110


In [84]:
# Step 6 : Retrain our parameters on the Train AND Test set, i.e. a new Neural Network model
# d_out_opt : optimal number of hidden neurons
# nb_epoch_opt : optimal number of epochs
input_dim = X_train_test.shape[1]
model = Sequential()

# Input layer
model.add(Dense(output_dim = 20, input_dim = input_dim, activation='sigmoid'))

# Output layer
model.add(Dense(output_dim = 3, init = 'normal', activation = 'softmax'))
model.compile(loss='categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

# We don't use the cross-validation here, we only train the model on the Train&Test set.
# To do so, we use the function "model.fit"
logs = model.fit(X_train_test, dummy_y_train_test, nb_epoch = 110)

Epoch 1/110
Epoch 2/110
Epoch 3/110
Epoch 4/110
Epoch 5/110
Epoch 6/110
Epoch 7/110
Epoch 8/110
Epoch 9/110
Epoch 10/110
Epoch 11/110
Epoch 12/110
Epoch 13/110
Epoch 14/110
Epoch 15/110
Epoch 16/110
Epoch 17/110
Epoch 18/110
Epoch 19/110
Epoch 20/110
Epoch 21/110
Epoch 22/110
Epoch 23/110
Epoch 24/110
Epoch 25/110
Epoch 26/110
Epoch 27/110
Epoch 28/110
Epoch 29/110
Epoch 30/110
Epoch 31/110
Epoch 32/110
Epoch 33/110
Epoch 34/110
Epoch 35/110
Epoch 36/110
Epoch 37/110
Epoch 38/110
Epoch 39/110
Epoch 40/110
Epoch 41/110
Epoch 42/110
Epoch 43/110
Epoch 44/110
Epoch 45/110
Epoch 46/110
Epoch 47/110
Epoch 48/110
Epoch 49/110
Epoch 50/110
Epoch 51/110
Epoch 52/110
Epoch 53/110
Epoch 54/110
Epoch 55/110
Epoch 56/110
Epoch 57/110
Epoch 58/110
Epoch 59/110
Epoch 60/110
Epoch 61/110
Epoch 62/110
Epoch 63/110
Epoch 64/110
Epoch 65/110
Epoch 66/110
Epoch 67/110
Epoch 68/110
Epoch 69/110
Epoch 70/110
Epoch 71/110
Epoch 72/110
Epoch 73/110
Epoch 74/110
Epoch 75/110
Epoch 76/110
Epoch 77/110
Epoch 78

Epoch 88/110
Epoch 89/110
Epoch 90/110
Epoch 91/110
Epoch 92/110
Epoch 93/110
Epoch 94/110
Epoch 95/110
Epoch 96/110
Epoch 97/110
Epoch 98/110
Epoch 99/110
Epoch 100/110
Epoch 101/110
Epoch 102/110
Epoch 103/110
Epoch 104/110
Epoch 105/110
Epoch 106/110
Epoch 107/110
Epoch 108/110
Epoch 109/110
Epoch 110/110


In [85]:
#model.predict(X_valid)
#np.argmax(model.predict(X_valid), axis=1)
encoded_Y_valid

array([2, 2, 1, 0, 0, 1, 0, 0, 2, 1, 0, 0, 0, 1, 0, 1, 0, 1, 2, 0, 1, 1, 2,
       0, 2, 0, 1, 1, 2, 2, 0, 1, 2, 2, 1, 1, 2, 0, 2, 0, 0, 1, 0, 2, 2, 2,
       1, 0, 2, 0], dtype=int64)

In [86]:
# Test the rate of classification success on the Valid set
# model.predict(X_valid) uses the function Softmax, i.e. returns a vector of dimension that sums to 1 
# for each X vector. The prediction is the highest value of each vector (use np.argmax)
pred_valid_set = np.argmax(model.predict(X_valid), axis=1)
sm.accuracy_score(pred_valid_set, encoded_Y_valid)

0.97999999999999998

In [None]:
# Model predictions of the Validation set
pred_valid_set

In [None]:
# Real classification of the Validation set
Y_valid