# 26 Sklearn Multilayer Perceptron

In [1]:
from sklearn.neural_network import MLPClassifier
from sklearn.datasets import fetch_mldata
from sklearn.model_selection import GridSearchCV

In [2]:
# stores data in ~/scikit_learn_data by default
mnist = fetch_mldata('MNIST original')

In [3]:
mnist.data.shape

(70000, 784)

In [4]:
split = 60000

X, y = mnist.data / 255., mnist.target

X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

mlp = MLPClassifier(verbose=10,
                    hidden_layer_sizes=(10, 10),
                    max_iter=10,
                    solver='sgd',
                    tol=1e-4, # tolerance
                    random_state=0,
                    learning_rate_init=.1) # learning_rate : {‘constant’, ‘invscaling’, ‘adaptive’}, default ‘constant’

mlp.fit(X_train, y_train)

print("training set score: %0.6f" % mlp.score(X_train, y_train))
print("test set score: %0.6f" % mlp.score(X_test, y_test))

Iteration 1, loss = 0.55338392
Iteration 2, loss = 0.32591660
Iteration 3, loss = 0.28802145
Iteration 4, loss = 0.26967585
Iteration 5, loss = 0.25730239
Iteration 6, loss = 0.24995988
Iteration 7, loss = 0.24193940
Iteration 8, loss = 0.23507409
Iteration 9, loss = 0.22909462
Iteration 10, loss = 0.22472000




training set score: 0.940417
test set score: 0.932600


### Using GridSearchCV for architecture.

In [5]:
parameters = {
    'hidden_layer_sizes': ((5,), (10,)),
    'activation': ('relu',),
}

mlp = MLPClassifier()
clf = GridSearchCV(mlp, parameters, verbose=10, n_jobs=1, cv=3)

# run parameter search
clf.fit(X_train, y_train)

Fitting 3 folds for each of 2 candidates, totalling 6 fits
[CV] activation=relu, hidden_layer_sizes=(5,) ........................
[CV]  activation=relu, hidden_layer_sizes=(5,), score=0.8899220155968807, total= 1.7min
[CV] activation=relu, hidden_layer_sizes=(5,) ........................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  1.7min remaining:    0.0s


[CV]  activation=relu, hidden_layer_sizes=(5,), score=0.8983449172458623, total=  33.5s
[CV] activation=relu, hidden_layer_sizes=(5,) ........................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:  2.3min remaining:    0.0s


[CV]  activation=relu, hidden_layer_sizes=(5,), score=0.8979346902035306, total=  24.5s
[CV] activation=relu, hidden_layer_sizes=(10,) .......................


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:  2.7min remaining:    0.0s


[CV]  activation=relu, hidden_layer_sizes=(10,), score=0.9313637272545491, total= 1.0min
[CV] activation=relu, hidden_layer_sizes=(10,) .......................


[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:  3.7min remaining:    0.0s


[CV]  activation=relu, hidden_layer_sizes=(10,), score=0.928296414820741, total=  45.2s
[CV] activation=relu, hidden_layer_sizes=(10,) .......................


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:  4.5min remaining:    0.0s


[CV]  activation=relu, hidden_layer_sizes=(10,), score=0.9270390558583788, total= 1.1min


[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:  5.6min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:  5.6min finished


GridSearchCV(cv=3, error_score='raise',
       estimator=MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'hidden_layer_sizes': ((5,), (10,)), 'activation': ('relu',)},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=10)