# Digit Recognizer with Multi-Layer Perceptron Classifier

In [1]:
import csv

import pandas as pd
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier

from common import load_data, score, write_output

In [2]:
X, y = load_data()
train_X, val_X, train_y, val_y = train_test_split(X, y, random_state=0)
row_count, feature_count = X.shape

How to determine the number of neurons in a hidden layer? [Answers on StackOverflow](https://stats.stackexchange.com/questions/181/how-to-choose-the-number-of-hidden-layers-and-nodes-in-a-feedforward-neural-netw?newreg=91b1eff0e75a4cfdb984d99c21ccd384)

In [3]:
model = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(330), random_state=0)
model.fit(train_X, train_y)

MLPClassifier(activation='relu', alpha=1e-05, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=330, learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=0, shuffle=True,
       solver='lbfgs', tol=0.0001, validation_fraction=0.1, verbose=False,
       warm_start=False)

In [4]:
predictions = model.predict(val_X)

In [5]:
score(predictions, val_y)

0.9636190476190476

In [6]:
# Parameter tuning
def tune_params(neuron_count):
    # We'd like to try out a single hidden layer network first
    model = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(neuron_count,), random_state=0)
    model.fit(train_X, train_y)
    predictions = model.predict(val_X)
    return score(predictions, val_y)


# test_X = pd.read_csv('test.csv')
# for c in range(790, 1000, 10):
#     mae = tune_params(c)
#     print(c, mae)
#     predictions = model.predict(test_X)
#     write_output(predictions, f'output_nn_{c}.csv')

In [7]:
test_X = pd.read_csv('test.csv')
predictions = model.predict(test_X)
write_output(predictions, 'output_nn.csv')

In [8]:
# model = MLPClassifier(solver='sgd', alpha=1e-5, hidden_layer_sizes=(56,), random_state=0)
# mae = 1.1010476190476191

# model = MLPClassifier(solver='sgd', alpha=1e-5, hidden_layer_sizes=(533,), random_state=0)
# mae = 0.20038095238095238


# model = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(56,), random_state=0)
# mae = 0.35295238095238096

# model = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(533,), random_state=0)
# mae = 0.15266666666666667


# model = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(533, 75), random_state=0)
# mae = 0.15161904761904763

# model = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(533, 100), random_state=0)
# mae = 0.14066666666666666

# model = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(533, 150), random_state=0)
# mae = 0.14361904761904762


"""
20 0.2
30 0.689238095238
40 0.861619047619
50 0.882666666667
60 0.885523809524
70 0.929238095238
80 0.942857142857
90 0.935523809524
100 0.94180952381
110 0.949904761905
120 0.944952380952
130 0.952
140 0.946
150 0.948952380952
160 0.952095238095
170 0.954952380952
180 0.957714285714
190 0.955142857143
200 0.96
210 0.958952380952
220 0.958571428571
230 0.958571428571
240 0.956952380952
241 0.959333333333
242 0.962666666667
243 0.959714285714
244 0.958095238095
245 0.958095238095
246 0.958095238095
247 0.959047619048
248 0.958095238095
249 0.958380952381
250 0.961619047619
251 0.959904761905
252 0.957142857143
253 0.960285714286
254 0.95619047619
255 0.958285714286
256 0.96219047619
257 0.95980952381
258 0.959619047619
259 0.960380952381
260 0.958761904762
270 0.959238095238
280 0.961333333333
290 0.95980952381
300 0.959047619048
310 0.960571428571
320 0.958952380952
321 0.958
322 0.958095238095
323 0.957238095238
324 0.958285714286
325 0.955428571429
326 0.957619047619
327 0.958095238095
328 0.957523809524
329 0.95819047619
330 0.963619047619
331 0.958
332 0.95980952381
333 0.958952380952
334 0.956380952381
335 0.959142857143
336 0.956952380952
337 0.960952380952
338 0.959142857143
339 0.958
340 0.956666666667
350 0.957904761905
360 0.960285714286
370 0.95980952381
380 0.958285714286
390 0.958095238095
400 0.959523809524
410 0.956666666667
420 0.95619047619
430 0.96
440 0.960285714286
450 0.957142857143
460 0.958095238095
480 0.96
490 0.958380952381
500 0.957428571429
510 0.95780952381
520 0.955333333333
530 0.957619047619
540 0.958761904762
550 0.958095238095
560 0.956857142857
570 0.959047619048
580 0.958571428571
590 0.958285714286
600 0.95819047619
610 0.955047619048
620 0.95619047619
630 0.957047619048
640 0.958571428571
650 0.956380952381
660 0.955619047619
670 0.959142857143
680 0.953428571429
690 0.955428571429
700 0.958952380952
710 0.955047619048
720 0.958761904762
730 0.955714285714
740 0.956380952381
750 0.956095238095
760 0.95580952381
770 0.959238095238
780 0.955619047619
"""

'\n20 0.2\n30 0.689238095238\n40 0.861619047619\n50 0.882666666667\n60 0.885523809524\n70 0.929238095238\n80 0.942857142857\n90 0.935523809524\n100 0.94180952381\n110 0.949904761905\n120 0.944952380952\n130 0.952\n140 0.946\n150 0.948952380952\n160 0.952095238095\n170 0.954952380952\n180 0.957714285714\n190 0.955142857143\n200 0.96\n210 0.958952380952\n220 0.958571428571\n230 0.958571428571\n240 0.956952380952\n241 0.959333333333\n242 0.962666666667\n243 0.959714285714\n244 0.958095238095\n245 0.958095238095\n246 0.958095238095\n247 0.959047619048\n248 0.958095238095\n249 0.958380952381\n250 0.961619047619\n251 0.959904761905\n252 0.957142857143\n253 0.960285714286\n254 0.95619047619\n255 0.958285714286\n256 0.96219047619\n257 0.95980952381\n258 0.959619047619\n259 0.960380952381\n260 0.958761904762\n270 0.959238095238\n280 0.961333333333\n290 0.95980952381\n300 0.959047619048\n310 0.960571428571\n320 0.958952380952\n321 0.958\n322 0.958095238095\n323 0.957238095238\n324 0.958285714286