# MNIST

In [5]:
from __future__ import print_function
from __future__ import division
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
import time

## Get the data

In [6]:
train = pd.read_csv("mnist_train.csv")
test = pd.read_csv("mnist_test.csv")
print(train.shape)
print(test.shape)

(42000, 785)
(28000, 784)


In [7]:
X = train[train.columns[1:]]
y = train[train.columns[0]]

## k-fold CV

pip install -U git+https://github.com/scikit-learn/scikit-learn

In [8]:
#from sklearn.model_selection import RepeatedStratifiedKFold
#from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPClassifier

clf = MLPClassifier(solver='adam', alpha=0.001, hidden_layer_sizes=(10, 10), random_state=42)
kf = KFold(n_splits=10, random_state=42) 
scores = []
times = []
for train_ixs, test_ixs in kf.split(X):
    start_time = time.time()
    clf.fit(X.loc[train_ixs], y.loc[train_ixs])
    end_time = time.time()
    time_taken = end_time - start_time
    times.append(time_taken)
    score = clf.score(X.loc[test_ixs], y.loc[test_ixs])
    print(score)
    scores.append(score)

print("mean score:", np.mean(scores))


0.829523809524




0.882857142857
0.831904761905
0.807857142857
0.861666666667
0.848333333333
0.862857142857
0.793095238095
0.866428571429
0.822619047619
mean score: 0.840714285714


## Grid Search

In [100]:
from sklearn.grid_search import GridSearchCV

parameters = {
    'solver' : ['adam', 'lbfgs', 'sgd'],
    'max_iter' : [500, 1000],
    'alpha' : [0.01, 0.001],
    'hidden_layer_sizes' : [50, 100, 200]
}

#clf_grid = GridSearchCV(MLPClassifier(), parameters, cv=5)
clf_grid = GridSearchCV(MLPClassifier(), parameters)
clf_grid.fit(X, y)
print(clf_grid)
print(clf_grid.best_score_)

GridSearchCV(cv=None, error_score='raise',
       estimator=MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False),
       fit_params={}, iid=True, n_jobs=1,
       param_grid={'alpha': [0.01, 0.001], 'hidden_layer_sizes': [50, 100]},
       pre_dispatch='2*n_jobs', refit=True, scoring=None, verbose=0)
0.941642857143


## Learning Curve

In [97]:
from sklearn.model_selection import learning_curve
from sklearn.model_selection import ShuffleSplit

In [101]:
import time
print(time.time())

1533187846.37


In [111]:
length = 100
ixs = range(length)
print(ixs)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]


In [127]:
import random
for split in range(10, 100, 10):
    random.shuffle(ixs)
    train_ixs = ixs[:split]
    test_ixs = ixs[split:]