In [2]:
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.cross_validation import cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.cross_validation import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

Using Theano backend.


In [3]:
# fix random seed for reproducibility
seed = 7
np.random.seed(seed)

In [4]:
# load dataset
dataframe = pd.read_csv("data/sonar.csv", header=None)
dataset = dataframe.values
# split into input X and output Y variables
X = dataset[:,0:60].astype(float)
Y = dataset[:,60]

In [5]:
# encode class values as integers
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)

In [6]:
# baseline model
def create_baseline():
    # create model
    model = Sequential()
    model.add(Dense(60, input_dim=60, init='normal', activation='relu'))
    model.add(Dense(1, init='normal', activation='sigmoid'))
    
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    return model

In [8]:
# evaluate model with standardized dataset
estimator = KerasClassifier(build_fn=create_baseline, nb_epoch=100, batch_size=5, verbose=0)
kfold = StratifiedKFold(y=encoded_Y, n_folds=10, shuffle=True, random_state=seed)
results = cross_val_score(estimator, X, encoded_Y, cv=kfold)
print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Baseline: 81.68% (5.67%)


## 11.3 Improve Performance With Data Preparation

In [9]:
# evaluate baseline model with standardized dataset
np.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_baseline, nb_epoch=100, batch_size=5, verbose=0)))

In [10]:
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(y=encoded_Y, n_folds=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Standardized: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()))

Standardized: 84.07% (0.06%)


## 11.4 Tuning Layers and Neurons in the Model

### 11.4.1. Evaluate a Smaller Network

In [11]:
# smaller model
def create_baseline():
    # create model
    model = Sequential()
    model.add(Dense(30, input_dim=60, init='normal', activation='relu'))
    model.add(Dense(1, init='normal', activation='sigmoid'))
    
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    return model

In [12]:
# evaluate baseline model with standardized dataset
np.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_baseline, nb_epoch=100, batch_size=5, verbose=0)))

In [19]:
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(y=encoded_Y, n_folds=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Standardized: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Standardized: 85.04% (5.53%)


### 11.4.2. Evaluate a Larger Network

In [20]:
# larger model
# addition layer with 30 neurons
def create_larger():
    # create model
    model = Sequential()
    model.add(Dense(60, input_dim=60, init='normal', activation='relu'))
    model.add(Dense(30, init='normal', activation='relu'))
    model.add(Dense(1, init='normal', activation='sigmoid'))
    
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    return model

In [21]:
# evaluate baseline model with standardized dataset
np.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_larger, nb_epoch=100, batch_size=5, verbose=0)))
# assemble pipeline
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(y=encoded_Y, n_folds=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Standardized: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Standardized: 86.47% (3.82%)


## What is the best score that you can achieve on this dataset?

In [26]:
# larger model
# addition layer with 30 neurons
def create_larger(n=30):
    # create model
    model = Sequential()
    model.add(Dense(60, input_dim=60, init='normal', activation='relu'))
    model.add(Dense(n, init='normal', activation='relu'))
    model.add(Dense(1, init='normal', activation='sigmoid'))
    
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    return model

In [28]:
# evaluate baseline model with standardized dataset
n = [5,10,15,20,25,30,35,40,45,50]
for i in n:
    print i
    def create_larger():
        # create model
        model = Sequential()
        model.add(Dense(60, input_dim=60, init='normal', activation='relu'))
        model.add(Dense(i, init='normal', activation='relu'))
        model.add(Dense(1, init='normal', activation='sigmoid'))

        # Compile model
        model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

        return model
    
    ####
    
    np.random.seed(seed)
    estimators = []
    estimators.append(('standardize', StandardScaler()))
    estimators.append(('mlp', KerasClassifier(build_fn=create_larger, nb_epoch=100, batch_size=5, verbose=0)))
    # assemble pipeline
    pipeline = Pipeline(estimators)
    kfold = StratifiedKFold(y=encoded_Y, n_folds=10, shuffle=True, random_state=seed)
    results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
    print("Standardized: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

5
Standardized: 85.06% (5.89%)
10
Standardized: 84.59% (4.75%)
15
Standardized: 82.66% (4.52%)
20
Standardized: 82.68% (5.31%)
25
Standardized: 84.11% (3.15%)
30
Standardized: 86.47% (3.82%)
35
Standardized: 84.11% (4.36%)
40
Standardized: 84.11% (3.80%)
45
Standardized: 83.66% (4.34%)
50
Standardized: 84.59% (3.67%)


### GridSearchCV

In [29]:
# create model
# larger model
# addition layer with 30 neurons
def create_larger(optimizer='rmsprop', init='glorot_uniform'):
    # create model
    model = Sequential()
    model.add(Dense(60, input_dim=60, init='normal', activation='relu'))
    model.add(Dense(30, init=init, activation='relu'))
    model.add(Dense(1, init=init, activation='sigmoid'))
    
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    
    return model

In [30]:
# grid search epochs, batch size and optimizer
optimizers = ['rmsprop','adam']
init = ['glorot_uniform', 'normal', 'uniform']
epochs = np.array([50,100,150])
batches = np.array([5,10,20])
param_grid = dict(optimizer=optimizers, nb_epoch=epochs, batch_size=batches, init=init)

In [31]:
# evaluate baseline model with standardized dataset
np.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_larger, verbose=0)))
# assemble pipeline
pipeline = Pipeline(estimators)
#kfold = StratifiedKFold(y=encoded_Y, n_folds=10, shuffle=True, random_state=seed)
#results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
#print("Standardized: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

In [34]:
np.random.seed(seed)
model = KerasClassifier(build_fn=create_larger, verbose=0)

In [35]:
# continue grid search // Andy Karpathy recommends random search
from sklearn.grid_search import GridSearchCV
grid = GridSearchCV(estimator=model, param_grid=param_grid)
grid_result = grid.fit(X, encoded_Y)

In [36]:
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
for params, mean_score, scores in grid_result.grid_scores_:
    print("%f (%f) with: %r" % (scores.mean(), scores.std(), params))

Best: 0.461538 using {'init': 'normal', 'optimizer': 'rmsprop', 'nb_epoch': 100, 'batch_size': 10}
0.442236 (0.071778) with: {'init': 'glorot_uniform', 'optimizer': 'rmsprop', 'nb_epoch': 50, 'batch_size': 5}
0.398758 (0.077073) with: {'init': 'glorot_uniform', 'optimizer': 'adam', 'nb_epoch': 50, 'batch_size': 5}
0.442029 (0.058272) with: {'init': 'glorot_uniform', 'optimizer': 'rmsprop', 'nb_epoch': 100, 'batch_size': 5}
0.399034 (0.017763) with: {'init': 'glorot_uniform', 'optimizer': 'adam', 'nb_epoch': 100, 'batch_size': 5}
0.336370 (0.086463) with: {'init': 'glorot_uniform', 'optimizer': 'rmsprop', 'nb_epoch': 150, 'batch_size': 5}
0.403796 (0.030499) with: {'init': 'glorot_uniform', 'optimizer': 'adam', 'nb_epoch': 150, 'batch_size': 5}
0.418150 (0.096265) with: {'init': 'normal', 'optimizer': 'rmsprop', 'nb_epoch': 50, 'batch_size': 5}
0.437474 (0.059289) with: {'init': 'normal', 'optimizer': 'adam', 'nb_epoch': 50, 'batch_size': 5}
0.456729 (0.017752) with: {'init': 'normal', 

## Above results are not trust worthy

In [38]:
# work around
import itertools
param_grid_alt = list(itertools.product(optimizers, init, epochs,batches))
final_score = []

for i in param_grid_alt:
    print i
    
    opt = i[0]
    ini = i[1]
    epo = i[2]
    bat = i[3]


    def create_larger(optimizer=opt, init=ini):
        # create model
        model = Sequential()
        model.add(Dense(60, input_dim=60, init='normal', activation='relu'))
        model.add(Dense(30, init=init, activation='relu'))
        model.add(Dense(1, init=init, activation='sigmoid'))

        # Compile model
        model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

        return model

    
    # evaluate baseline model with standardized dataset
    np.random.seed(seed)
    estimators = []
    estimators.append(('standardize', StandardScaler()))
    estimators.append(('mlp', KerasClassifier(build_fn=create_larger, nb_epoch=epo, batch_size=bat, verbose=0)))
    # assemble pipeline
    pipeline = Pipeline(estimators)
    kfold = StratifiedKFold(y=encoded_Y, n_folds=10, shuffle=True, random_state=seed)
    results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
    
    print("Standardized: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))
    final_score.append((results.mean()*100, results.std()*100))



('rmsprop', 'glorot_uniform', 50, 5)
Standardized: 85.06% (3.47%)




('rmsprop', 'glorot_uniform', 50, 10)
Standardized: 85.09% (2.57%)




('rmsprop', 'glorot_uniform', 50, 20)
Standardized: 85.54% (3.84%)




('rmsprop', 'glorot_uniform', 100, 5)
Standardized: 85.06% (5.89%)




('rmsprop', 'glorot_uniform', 100, 10)
Standardized: 85.04% (6.38%)




('rmsprop', 'glorot_uniform', 100, 20)
Standardized: 86.52% (5.64%)




('rmsprop', 'glorot_uniform', 150, 5)
Standardized: 86.49% (4.32%)




('rmsprop', 'glorot_uniform', 150, 10)
Standardized: 86.49% (5.27%)




('rmsprop', 'glorot_uniform', 150, 20)
Standardized: 86.49% (5.27%)




('rmsprop', 'normal', 50, 5)
Standardized: 84.59% (4.69%)




('rmsprop', 'normal', 50, 10)
Standardized: 84.14% (5.63%)




('rmsprop', 'normal', 50, 20)
Standardized: 84.59% (5.97%)




('rmsprop', 'normal', 100, 5)
Standardized: 85.52% (4.92%)




('rmsprop', 'normal', 100, 10)
Standardized: 84.59% (4.75%)




('rmsprop', 'normal', 100, 20)
Standardized: 84.54% (6.15%)




('rmsprop', 'normal', 150, 5)
Standardized: 84.54% (5.36%)




('rmsprop', 'normal', 150, 10)
Standardized: 85.04% (5.62%)




('rmsprop', 'normal', 150, 20)
Standardized: 84.07% (5.85%)




('rmsprop', 'uniform', 50, 5)
Standardized: 85.54% (5.68%)




('rmsprop', 'uniform', 50, 10)
Standardized: 86.04% (4.00%)




('rmsprop', 'uniform', 50, 20)
Standardized: 86.50% (4.76%)




('rmsprop', 'uniform', 100, 5)
Standardized: 85.54% (5.27%)




('rmsprop', 'uniform', 100, 10)
Standardized: 85.06% (6.27%)




('rmsprop', 'uniform', 100, 20)
Standardized: 84.59% (6.38%)




('rmsprop', 'uniform', 150, 5)
Standardized: 87.42% (4.61%)




('rmsprop', 'uniform', 150, 10)
Standardized: 85.54% (4.39%)




('rmsprop', 'uniform', 150, 20)
Standardized: 85.56% (2.17%)




('adam', 'glorot_uniform', 50, 5)
Standardized: 85.09% (4.97%)




('adam', 'glorot_uniform', 50, 10)
Standardized: 84.16% (2.88%)




('adam', 'glorot_uniform', 50, 20)
Standardized: 83.68% (3.66%)




('adam', 'glorot_uniform', 100, 5)
Standardized: 85.56% (4.28%)




('adam', 'glorot_uniform', 100, 10)
Standardized: 85.56% (5.66%)




('adam', 'glorot_uniform', 100, 20)
Standardized: 85.56% (5.66%)




('adam', 'glorot_uniform', 150, 5)
Standardized: 84.09% (6.20%)




('adam', 'glorot_uniform', 150, 10)
Standardized: 85.09% (4.99%)




('adam', 'glorot_uniform', 150, 20)
Standardized: 84.16% (5.14%)




('adam', 'normal', 50, 5)
Standardized: 84.59% (5.97%)




('adam', 'normal', 50, 10)
Standardized: 84.61% (5.12%)




('adam', 'normal', 50, 20)
Standardized: 84.64% (4.61%)




('adam', 'normal', 100, 5)
Standardized: 86.47% (3.82%)




('adam', 'normal', 100, 10)
Standardized: 85.97% (5.69%)




('adam', 'normal', 100, 20)
Standardized: 85.06% (3.47%)




('adam', 'normal', 150, 5)
Standardized: 83.59% (5.89%)




('adam', 'normal', 150, 10)
Standardized: 84.54% (4.92%)




('adam', 'normal', 150, 20)
Standardized: 84.09% (5.42%)




('adam', 'uniform', 50, 5)
Standardized: 85.09% (3.34%)




('adam', 'uniform', 50, 10)
Standardized: 86.54% (4.14%)




('adam', 'uniform', 50, 20)
Standardized: 86.54% (4.14%)




('adam', 'uniform', 100, 5)
Standardized: 85.56% (6.44%)




('adam', 'uniform', 100, 10)
Standardized: 85.09% (5.86%)




('adam', 'uniform', 100, 20)
Standardized: 83.61% (6.59%)




('adam', 'uniform', 150, 5)
Standardized: 84.09% (4.51%)




('adam', 'uniform', 150, 10)
Standardized: 84.09% (4.51%)




('adam', 'uniform', 150, 20)
Standardized: 85.06% (4.07%)


