In [1]:
from sklearn.datasets import fetch_mldata
mnist = fetch_mldata('MNIST original')

In [2]:
# Use the notion of y=f(X)
X, y = mnist["data"], mnist["target"]
print('Data size: {0} x {1} and label size {2}'.format(X.shape[0],X.shape[1],y.shape[0]))

Data size: 70000 x 784 and label size 70000


In [3]:
import numpy as np
N = len(X)
M = int(N*4/5)
print("train set size={0}, test set size={1}".format(M, N-M))
shuffle_index = np.random.permutation(N)
# reshuffle the data and use M samples as training and N-M as test
X_train, X_test = X[shuffle_index[:M],:], X[shuffle_index[M:],:]
y_train, y_test = y[shuffle_index[:M]], y[shuffle_index[M:]]

train set size=56000, test set size=14000


# LogisticRegression with softmax

In [5]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix
from sklearn.metrics import accuracy_score
print('LogisticRegression')
lr_clf = LogisticRegression(max_iter=50, random_state=42, multi_class='multinomial',solver='lbfgs')
lr_clf.fit(X_train, y_train)
y_train_pred = cross_val_predict(lr_clf, X_train, y_train, cv=4)
# Compute confusion matrix and accuracy score on the training set
conf_mx = confusion_matrix(y_train, y_train_pred)
print('LogisticRegression Confusion matrix: \n{0}'.format(conf_mx))
print('LogisticRegression Classifier accuracy on the training set is {0}'.format(accuracy_score(y_train, y_train_pred)))
# Compute accuracy on the test set
y_pred_test = lr_clf.predict(X_test)
print('LogisticRegression Classifier accuracy on the test set is {0}'.format(accuracy_score(y_test, y_pred_test)))

LogisticRegression
LogisticRegression Confusion matrix: 
[[5394    1   18   12    9   47   36    9   34    8]
 [   1 6144   22   18    5   23    6   18   64   10]
 [  32   63 4948   97   63   29   70   72  140   24]
 [  20   18  134 5072    5  185   20   59  106   57]
 [  12   24   28   10 5084    6   55   19   52  192]
 [  73   21   40  198   56 4324  105   20  164   56]
 [  37   14   41    3   40   65 5262    6   30    2]
 [  20   28   73   24   58    7    3 5449   21  198]
 [  28   99   51  140   29  163   45   26 4781   70]
 [  29   36   17   68  149   33    2  158   51 5012]]
LogisticRegression Classifier accuracy on the training set is 0.9191071428571429
LogisticRegression Classifier accuracy on the test set is 0.9188571428571428


# MLP

In [6]:
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix
from sklearn.metrics import accuracy_score

print('MLPClassifier')
mlp = MLPClassifier(solver='sgd', # optimize the loss function
                    activation='relu',
                    alpha=1e-3,
                    hidden_layer_sizes=(80,80),
                    random_state=42, # fix the random state, so each run has the same output
                    max_iter=1000,
                    verbose=1, # 0: do not print out log, >= 1: print out log
                    learning_rate_init=.0001)
mlp.fit(X_train, y_train)
y_train_pred = cross_val_predict(mlp, X_train, y_train, cv=4)

# Compute confusion matrix and accuracy score on the training set
conf_mx = confusion_matrix(y_train, y_train_pred)
print('MLPClassifier Confusion matrix: \n{0}'.format(conf_mx))
print('MLPClassifier Classifier accuracy on the training set is {0}'.format(accuracy_score(y_train, y_train_pred)))
# Compute accuracy on the test set
y_pred_test = mlp.predict(X_test)
print('MLPClassifier Classifier accuracy on the test set is {0}'.format(accuracy_score(y_test, y_pred_test)))

MLPClassifier
Iteration 1, loss = 2.59616466
Iteration 2, loss = 0.90413396
Iteration 3, loss = 0.64608320
Iteration 4, loss = 0.51734009
Iteration 5, loss = 0.43955791
Iteration 6, loss = 0.38813506
Iteration 7, loss = 0.34934155
Iteration 8, loss = 0.31744697
Iteration 9, loss = 0.29395749
Iteration 10, loss = 0.27409681
Iteration 11, loss = 0.25711145
Iteration 12, loss = 0.24248883
Iteration 13, loss = 0.23044228
Iteration 14, loss = 0.21976158
Iteration 15, loss = 0.21064804
Iteration 16, loss = 0.20212872
Iteration 17, loss = 0.19440145
Iteration 18, loss = 0.18840743
Iteration 19, loss = 0.18266774
Iteration 20, loss = 0.17633444
Iteration 21, loss = 0.17145999
Iteration 22, loss = 0.16596337
Iteration 23, loss = 0.16138843
Iteration 24, loss = 0.15717610
Iteration 25, loss = 0.15358380
Iteration 26, loss = 0.15007566
Iteration 27, loss = 0.14655255
Iteration 28, loss = 0.14307415
Iteration 29, loss = 0.14001496
Iteration 30, loss = 0.13679172
Iteration 31, loss = 0.13350067
Ite

Iteration 27, loss = 0.13577298
Iteration 28, loss = 0.13231190
Iteration 29, loss = 0.12884851
Iteration 30, loss = 0.12509071
Iteration 31, loss = 0.12205101
Iteration 32, loss = 0.11863080
Iteration 33, loss = 0.11575994
Iteration 34, loss = 0.11318374
Iteration 35, loss = 0.11090415
Iteration 36, loss = 0.10797565
Iteration 37, loss = 0.10566418
Iteration 38, loss = 0.10356656
Iteration 39, loss = 0.10137630
Iteration 40, loss = 0.09910441
Iteration 41, loss = 0.09661711
Iteration 42, loss = 0.09501602
Iteration 43, loss = 0.09332739
Iteration 44, loss = 0.09152188
Iteration 45, loss = 0.09005016
Iteration 46, loss = 0.08764279
Iteration 47, loss = 0.08630291
Iteration 48, loss = 0.08461030
Iteration 49, loss = 0.08309527
Iteration 50, loss = 0.08193763
Iteration 51, loss = 0.08044364
Iteration 52, loss = 0.07891886
Iteration 53, loss = 0.07766530
Iteration 54, loss = 0.07631685
Iteration 55, loss = 0.07506704
Iteration 56, loss = 0.07388748
Iteration 57, loss = 0.07250211
Iteratio

Iteration 43, loss = 0.10851841
Iteration 44, loss = 0.10702622
Iteration 45, loss = 0.10488961
Iteration 46, loss = 0.10312905
Iteration 47, loss = 0.10148857
Iteration 48, loss = 0.09992305
Iteration 49, loss = 0.09872262
Iteration 50, loss = 0.09659534
Iteration 51, loss = 0.09529181
Iteration 52, loss = 0.09384764
Iteration 53, loss = 0.09238268
Iteration 54, loss = 0.09122932
Iteration 55, loss = 0.08986690
Iteration 56, loss = 0.08870875
Iteration 57, loss = 0.08774492
Iteration 58, loss = 0.08661297
Iteration 59, loss = 0.08515060
Iteration 60, loss = 0.08368635
Iteration 61, loss = 0.08259912
Iteration 62, loss = 0.08179667
Iteration 63, loss = 0.08049553
Iteration 64, loss = 0.07956951
Iteration 65, loss = 0.07878609
Iteration 66, loss = 0.07782891
Iteration 67, loss = 0.07658835
Iteration 68, loss = 0.07566631
Iteration 69, loss = 0.07476103
Iteration 70, loss = 0.07395310
Iteration 71, loss = 0.07279248
Iteration 72, loss = 0.07186698
Iteration 73, loss = 0.07115945
Iteratio

Iteration 14, loss = 0.24074164
Iteration 15, loss = 0.22958794
Iteration 16, loss = 0.21917135
Iteration 17, loss = 0.20987096
Iteration 18, loss = 0.28216631
Iteration 19, loss = 0.20655991
Iteration 20, loss = 0.19299118
Iteration 21, loss = 0.18369977
Iteration 22, loss = 0.17615817
Iteration 23, loss = 0.16966567
Iteration 24, loss = 0.16719259
Iteration 25, loss = 0.16752135
Iteration 26, loss = 0.15515464
Iteration 27, loss = 0.15069076
Iteration 28, loss = 0.14603168
Iteration 29, loss = 0.14258836
Iteration 30, loss = 0.13853756
Iteration 31, loss = 0.13519240
Iteration 32, loss = 0.13201828
Iteration 33, loss = 0.12863299
Iteration 34, loss = 0.12585709
Iteration 35, loss = 0.12318761
Iteration 36, loss = 0.12038689
Iteration 37, loss = 0.11788955
Iteration 38, loss = 0.11519656
Iteration 39, loss = 0.11293015
Iteration 40, loss = 0.11044900
Iteration 41, loss = 0.10846813
Iteration 42, loss = 0.10668202
Iteration 43, loss = 0.10424486
Iteration 44, loss = 0.10251915
Iteratio

Iteration 23, loss = 0.16403257
Iteration 24, loss = 0.15466448
Iteration 25, loss = 0.15010026
Iteration 26, loss = 0.14519534
Iteration 27, loss = 0.14930232
Iteration 28, loss = 0.13809286
Iteration 29, loss = 0.13355221
Iteration 30, loss = 0.12976738
Iteration 31, loss = 0.14380323
Iteration 32, loss = 0.12472342
Iteration 33, loss = 0.12455208
Iteration 34, loss = 0.11816142
Iteration 35, loss = 0.11532507
Iteration 36, loss = 0.11240374
Iteration 37, loss = 0.11003402
Iteration 38, loss = 0.10821929
Iteration 39, loss = 0.11556795
Iteration 40, loss = 0.10364254
Iteration 41, loss = 0.10125543
Iteration 42, loss = 0.09896737
Iteration 43, loss = 0.09696274
Iteration 44, loss = 0.09480080
Iteration 45, loss = 0.09303915
Iteration 46, loss = 0.09134598
Iteration 47, loss = 0.08963861
Iteration 48, loss = 0.08784485
Iteration 49, loss = 0.08624453
Iteration 50, loss = 0.09215761
Iteration 51, loss = 0.08418022
Iteration 52, loss = 0.08221780
Iteration 53, loss = 0.08075865
Iteratio

# CNN

In [7]:
import numpy as np
from sklearn import preprocessing
import tensorflow as tf

print('SVM 3 convolutional layers')
print("Reshape data")
train_x_data = X_train.reshape(X_train.shape[0], 28, 28, 1).astype(np.float32) # M*28*28*1, 1 is channel
train_y_data = y_train.reshape(y_train.shape[0], 1).astype(np.float32) # M*1, 1 is channel
test_x_data = X_test.reshape(X_test.shape[0], 28, 28, 1).astype(np.float32) # (N-M)*28*28*1, 1 is channel
test_y_data = y_test.reshape(y_test.shape[0], 1).astype(np.float32) # (N-M)*1, 1 is channel

train_x_minmax = train_x_data / 255.0 # each pixel now in [0, 1]
test_x_minmax = test_x_data / 255.0

# Reformat y into one-hot encoding style
# each label become a 10-element vector, such as [1 0 0 0 0 0 0 0 0 0]
# At the output of CNN, we can use softmax to do multiclass classification.
lb = preprocessing.LabelBinarizer()
lb.fit(train_y_data)
train_y_data_trans = lb.transform(train_y_data)
test_y_data_trans = lb.transform(test_y_data)

print('test_y_data_trans={0}'.format(test_y_data_trans))

print("Start evaluating CNN model by tensorflow...")

# Model input
x = tf.placeholder(tf.float32, shape=[None, 28, 28, 1]) # ? * 28*28*1, ? is batch size
y_ = tf.placeholder(tf.float32, [None, 10]) # ? * 10

# Weight initialization
def init_weight(weight):
    initial = tf.truncated_normal(weight, stddev=0.1)
    r = tf.Variable(initial)
    print("{0} -> {1}".format(weight, r))
    return r

def init_bias(bias):
    initial = tf.constant(0.1, shape=bias)
    r = tf.Variable(initial)
    print("{0} -> {1}".format(bias, r))
    return r

# First convolutional layer
# Convolution: compute 32 features for each 5x5 patch
# Max pooling: reduce image size to 14x14, because 2*2 -> 1. this is downsample
W_conv1 = init_weight([5, 5, 1, 32])
b_conv1 = init_bias([32])

h_conv1 = tf.nn.relu(tf.nn.conv2d(x, W_conv1, strides=[1, 1, 1, 1], padding='SAME') + b_conv1)
# `tf.nn.conv2d()` computes a 2-D convolution given 4-D `input` and `filter` tensors
# input tensor shape `[batch, in_height, in_width, in_channels]`, batch is number of observation 
# filter tensor shape `[filter_height, filter_width, in_channels, out_channels]`
# strides: the stride of the sliding window for each dimension of input.
# padding: 'SAME' or 'VALID', determine the type of padding algorithm to use
h_pool1 = tf.nn.max_pool(h_conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
# ksize: the size of the window for each dimension of the input tensor.

# Second conv layer
# Max pooling: downsample image size to 7x7
W_conv2 = init_weight([5, 5, 32, 64])
b_conv2 = init_bias([64])
h_conv2 = tf.nn.relu(tf.nn.conv2d(h_pool1, W_conv2, strides=[1, 1, 1, 1], padding='SAME') + b_conv2)
h_pool2 = tf.nn.max_pool(h_conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

# Third conv layer
# Max pooling: downsample image size to 4x4
W_conv3 = init_weight([5, 5, 64, 128])
b_conv3 = init_bias([128])
h_conv3 = tf.nn.relu(tf.nn.conv2d(h_pool2, W_conv3, strides=[1, 1, 1, 1], padding='SAME') + b_conv3)
h_pool3 = tf.nn.max_pool(h_conv3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

# Densely connected layer
# Fully-conected layer with 1024 neurons
W_fc1 = init_weight([4 * 4 * 128, 1024])
b_fc1 = init_bias([1024])

h_pool1_flat = tf.reshape(h_pool3, [-1, 4*4*128])
h_fc1 = tf.nn.relu(tf.matmul(h_pool1_flat, W_fc1) + b_fc1)

# Dropout
# To reduce overfitting, we apply dropout before the readout layer.
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

# Readout layer
W_fc2 = init_weight([1024, 10])
b_fc2 = init_bias([10])

y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2

# Train and evaluate
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y_conv, labels=y_))
optimizer = tf.train.AdamOptimizer(1e-4)
# Other solver, optimizer = tf.train.GradientDescentOptimizer(1e-4)
train = optimizer.minimize(loss)

correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)

for step in range(6000):
    # select some records randomly as a batch
    sample_index = np.random.choice(train_x_minmax.shape[0], 50)
    batch_xs = train_x_minmax[sample_index, :]
    batch_ys = train_y_data_trans[sample_index, :]
    # print some information during processing
    if step % 100 == 0:
        train_accuracy = sess.run(accuracy, feed_dict={
            x: batch_xs, y_: batch_ys, keep_prob: 1.0})
        print("step {0}, SVM 3 convolutional layers training accuracy {1}".format(step, train_accuracy))
    # Run train a on the batch
    sess.run(train, feed_dict={x: batch_xs, y_: batch_ys, keep_prob: 0.5})

print("SVM 3 convolutional layers test accuracy {0}".format(sess.run(accuracy, feed_dict={
    x: test_x_minmax, y_: test_y_data_trans, keep_prob: 1.0})))

  return f(*args, **kwds)
  from ._conv import register_converters as _register_converters


SVM 3 convolutional layers
Reshape data
test_y_data_trans=[[1 0 0 ... 0 0 0]
 [1 0 0 ... 0 0 0]
 [1 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 1]]
Start evaluating CNN model by tensorflow...
[5, 5, 1, 32] -> <tf.Variable 'Variable:0' shape=(5, 5, 1, 32) dtype=float32_ref>
[32] -> <tf.Variable 'Variable_1:0' shape=(32,) dtype=float32_ref>
[5, 5, 32, 64] -> <tf.Variable 'Variable_2:0' shape=(5, 5, 32, 64) dtype=float32_ref>
[64] -> <tf.Variable 'Variable_3:0' shape=(64,) dtype=float32_ref>
[5, 5, 64, 128] -> <tf.Variable 'Variable_4:0' shape=(5, 5, 64, 128) dtype=float32_ref>
[128] -> <tf.Variable 'Variable_5:0' shape=(128,) dtype=float32_ref>
[2048, 1024] -> <tf.Variable 'Variable_6:0' shape=(2048, 1024) dtype=float32_ref>
[1024] -> <tf.Variable 'Variable_7:0' shape=(1024,) dtype=float32_ref>
[1024, 10] -> <tf.Variable 'Variable_8:0' shape=(1024, 10) dtype=float32_ref>
[10] -> <tf.Variable 'Variable_9:0' shape=(10,) dtype=float32_ref>
Instructions for upda