What do you think will happen to the accuracy of training a neural network as the number of layers increases? Try training a neural network for the MNIST data using more layers to see what happens. Speculate as to why you are seeing what you are seeing.

In [1]:
# Read in the mnist digit dataset

from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.utils import check_random_state
import random
from sklearn import tree
import matplotlib.pyplot as plt
import numpy as np
from sklearn.neural_network import MLPClassifier

X, y = fetch_openml('mnist_784', version=1, return_X_y=True)

Next, we will divide the data into a training set and test set, randomly selecting 5000 examples for training

In [2]:
train_samples = 5000

random_state = check_random_state(0)
permutation = random_state.permutation(X.shape[0])
X = X[permutation]
y = y[permutation]
X = X.reshape((X.shape[0], -1))

X_train, X_test, y_train, y_test = train_test_split(
    X, y, train_size=train_samples, test_size=10000)

Now, we will try 10 hidden units in each of 1 to 10 layers, keeping track of the min/mean/max accuracy of models at each number of layers over ten runs.



In [5]:
reps = 10
for i in range(1,11):
  nhidden = i*[100]
  accsum,accmin,accmax = 0.0,1.0,0.0
  for r in range(reps):
    clf = MLPClassifier(hidden_layer_sizes=nhidden, max_iter = 10000)
    clf.fit(X_train, y_train)
    score = clf.score(X_test, y_test)
    accsum += score
    accmin = min(accmin, score)
    accmax = max(accmax, score)
  print(i, accmin, accsum/reps, accmax)

1 0.8696 0.88229 0.8923
2 0.8584 0.8683400000000001 0.878
3 0.8442 0.85788 0.8673
4 0.8638 0.8722900000000001 0.8782
5 0.8817 0.88993 0.9004
6 0.8868 0.8956899999999999 0.9046
7 0.8987 0.9082800000000001 0.9179
8 0.9074 0.9196899999999999 0.9272
9 0.891 0.9213099999999999 0.936
10 0.9117 0.9243399999999999 0.9389
