## Import packages and load data

In [None]:
from sklearn.neural_network import MLPClassifier
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

In [3]:
cancer = load_breast_cancer()
cancer.data.shape

(569, 30)

In [4]:
X = cancer['data']
y = cancer['target']

## Train Test Split

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)

## Let's compute a baseline model with default values and raw data

In [9]:
mlp = MLPClassifier(random_state=0, max_iter=1000)

In [10]:
mlp.fit(X_train, y_train)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100,), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=1000,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=0, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

In [11]:
#Not bad but not as good as other classifiers yet
print('Training Score: {:.2f}'.format(mlp.score(X_train, y_train)))
print('Test Score: {:.2f}'.format(mlp.score(X_test, y_test)))

Training Score: 0.97
Test Score: 0.92


In [14]:
pred = mlp.predict(X_test)

In [15]:
#13 misclassifications
print(confusion_matrix(y_test, pred))

[[ 58   8]
 [  5 100]]


## Let's apply the StandardScaler to our features to improve Performance

In [16]:
scaler = StandardScaler()

In [17]:
scaler.fit(X_train)

StandardScaler(copy=True, with_mean=True, with_std=True)

In [18]:
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [19]:
mlp = MLPClassifier(random_state=0, max_iter=1000)

In [20]:
mlp.fit(X_train_scaled, y_train)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100,), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=1000,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=0, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

In [21]:
#Better performance now
print('Training Score: {:.2f}'.format(mlp.score(X_train_scaled, y_train)))
print('Test Score: {:.2f}'.format(mlp.score(X_test_scaled, y_test)))

Training Score: 0.99
Test Score: 0.96


In [22]:
pred = mlp.predict(X_test_scaled)

In [23]:
#6 misclassifications
print(confusion_matrix(y_test, pred))

[[ 63   3]
 [  3 102]]


## Let's increase the L2 Regularization. Can we improve the Test Set Performance?

In [24]:
mlp = MLPClassifier(random_state=0, max_iter=1000, alpha=1)

In [25]:
mlp.fit(X_train_scaled, y_train)

MLPClassifier(activation='relu', alpha=1, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100,), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=1000,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=0, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

In [26]:
#Even better generalization!
print('Training Score: {:.2f}'.format(mlp.score(X_train_scaled, y_train)))
print('Test Score: {:.2f}'.format(mlp.score(X_test_scaled, y_test)))

Training Score: 0.99
Test Score: 0.98


In [27]:
pred = mlp.predict(X_test_scaled)

In [28]:
#4 misclassifications
print(confusion_matrix(y_test, pred))

[[ 64   2]
 [  2 103]]


## Finally let's vary the number of hidden layers and number of nodes per hidden layer

1) Reduce number of nodes to number of features

In [38]:
mlp = MLPClassifier(random_state=0, max_iter=1000, alpha=1, hidden_layer_sizes=(30,))

In [39]:
mlp.fit(X_train_scaled, y_train)

MLPClassifier(activation='relu', alpha=1, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(30,), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=1000,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=0, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

In [40]:
#No further increase of test score, slight decrease even
print('Training Score: {:.2f}'.format(mlp.score(X_train_scaled, y_train)))
print('Test Score: {:.2f}'.format(mlp.score(X_test_scaled, y_test)))

Training Score: 0.99
Test Score: 0.96


2) Add a second hiddden layer

In [41]:
mlp = MLPClassifier(random_state=0, max_iter=1000, alpha=1, hidden_layer_sizes=(100, 100,))

In [42]:
mlp.fit(X_train_scaled, y_train)

MLPClassifier(activation='relu', alpha=1, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100, 100), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=1000,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=0, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

In [56]:
#No further increase of test score, and train score may indicate overfitting
print('Training Score: {:.2f}'.format(mlp.score(X_train_scaled, y_train)))
print('Test Score: {:.2f}'.format(mlp.score(X_test_scaled, y_test)))

Training Score: 0.99
Test Score: 0.98


3) Add a second hidden layer with a combined Node count of ~2/3 of input features

In [47]:
mlp = MLPClassifier(random_state=0, max_iter=1000, alpha=1, hidden_layer_sizes=(14, 7,))

In [48]:
mlp.fit(X_train_scaled, y_train)

MLPClassifier(activation='relu', alpha=1, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(14, 7), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=1000,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=0, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

In [49]:
#No further increase of test score
print('Training Score: {:.2f}'.format(mlp.score(X_train_scaled, y_train)))
print('Test Score: {:.2f}'.format(mlp.score(X_test_scaled, y_test)))

Training Score: 0.99
Test Score: 0.97


## Test Best Model so far with a different Solver and Activation Function 

In [50]:
mlp = MLPClassifier(random_state=0, max_iter=1000, alpha=1, solver='lbfgs')

In [51]:
mlp.fit(X_train_scaled, y_train)

MLPClassifier(activation='relu', alpha=1, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100,), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=1000,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=0, shuffle=True, solver='lbfgs',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

In [52]:
#No further increase of test score; 'adam' and 'lbfgs' same test score 
print('Training Score: {:.2f}'.format(mlp.score(X_train_scaled, y_train)))
print('Test Score: {:.2f}'.format(mlp.score(X_test_scaled, y_test)))

Training Score: 0.99
Test Score: 0.98


In [53]:
mlp = MLPClassifier(random_state=0, max_iter=1000, alpha=1, activation='tanh')

In [54]:
mlp.fit(X_train_scaled, y_train)

MLPClassifier(activation='tanh', alpha=1, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100,), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=1000,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=0, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

In [55]:
#No further increase of test score; tanh and relu activation functions same test score
print('Training Score: {:.2f}'.format(mlp.score(X_train_scaled, y_train)))
print('Test Score: {:.2f}'.format(mlp.score(X_test_scaled, y_test)))

Training Score: 0.99
Test Score: 0.98
