In [84]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import numpy.random
import tensorflow as tf
import datetime
import struct
import time
from sklearn import linear_model
from sklearn import svm
import sklearn.model_selection
import sklearn.metrics
import sklearn.preprocessing

root_dir = "D:/Jupyter/";
logs_dir = root_dir + "Logs/"
data_dir = root_dir + 'Datasets/'

def mnist_read_imgs(fname):
    with open(fname, mode='rb') as f:
        (_, img_num, img_xsize, img_ysize) = struct.unpack('>IIII',f.read(4 * 4))
        data_img = np.fromfile(f, dtype=np.uint8).reshape(img_num, img_xsize, img_ysize)
    return data_img

def mnist_read_lbls(fname):
    with open(data_dir + 'MNIST/train-labels.idx1-ubyte', mode='rb') as f:
        (_, lab_num) = struct.unpack('>II', f.read(4 * 2))
        data_lab = np.fromfile(f, dtype=np.uint8)
    return data_lab

def minibatch(X, y, num=1000):
    inds = np.random.choice(range(X.shape[0]), size=num)
    return X[inds], y[inds]

In [85]:
src_X = mnist_read_imgs(data_dir+'MNIST/train-images.idx3-ubyte')
src_y = mnist_read_lbls(data_dir+'MNIST/train-labels.idx1-ubyte')

random_seed = 42
(dev_X, test_X, dev_y, test_y) = sklearn.model_selection.train_test_split(src_X, src_y, random_state=random_seed, test_size=0.2)
(train_X, valid_X, train_y, valid_y) = sklearn.model_selection.train_test_split(dev_X, dev_y, random_state=random_seed, test_size=0.2)

## 1D Neural Networks
### Prepare data

In [83]:
def mnist1d_transform_imgs(x):
    return x.reshape(x.shape[0], x.shape[1] * x.shape[2]) / 255

def mnist1d_transform_lbls(y):
    return np.array([1.0*(y==i) for i in range(10)]).transpose()

In [6]:
(train1d_X, valid1d_X, test1d_X) = (mnist1d_transform_imgs(x) for x in (train_X, valid_X, test_X))
(train1d_y, valid1d_y, test1d_y) = (mnist1d_transform_lbls(y) for y in (train_y, valid_y, test_y))

### 0-hidden layer network
Current accuracy on validation is __92.5%__

In [13]:
tf.reset_default_graph()
dt_now = datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S")
log_dir = root_dir + 'Logs/' + dt_now + '-LR-S-B5k'

tf_LearningRate = tf.placeholder(shape=(), name='LearningRate', dtype=tf.float32)
tf_Input = tf.placeholder(shape=(None, 784), name='Input', dtype=tf.float32)
tf_Labels = tf.placeholder(shape=(None, 10), name='Labels', dtype=tf.float32)

tf_Output = tf.layers.dense(tf_Input, 10, use_bias=True, name='LogisticRegression')

tf_OutProb = tf.nn.softmax(tf_Output)

tf_Error = -tf.reduce_mean(tf.reduce_sum(tf_Labels * tf.log(tf_OutProb), reduction_indices=1))
tf_Optimizer = tf.train.GradientDescentOptimizer(tf_LearningRate)
tf_TrainStep = tf_Optimizer.minimize(tf_Error)

tf_Initialize = tf.global_variables_initializer()

tf_ErrorSummary = tf.summary.scalar('Error', tf_Error)
tf_FW = tf.summary.FileWriter(log_dir, tf.get_default_graph())

In [14]:
num_epochs = 100
num_steps = 100
batch_size = 5000
learning_rate = 0.5
fulltrain_batch = {tf_Input: train1d_X, tf_Labels:train1d_y}
validation_batch = {tf_Input: valid1d_X, tf_Labels: valid1d_y}
test_batch = {tf_Input: test1d_X, tf_Labels: test1d_y}
with tf.Session() as sess:
    tf_Initialize.run()
    for i in range(num_epochs):
        if i > 10:
            learning_rate = 0.2
        if i > 50:
            learning_rate = 0.1
        tX, ty = minibatch(train1d_X, train1d_y, num=batch_size)
        batch = {tf_Input: tX, tf_Labels:ty, tf_LearningRate: learning_rate}
        for j in range(num_steps):
            tf_TrainStep.run(feed_dict=batch)
        
        sumstr = tf_ErrorSummary.eval(feed_dict=validation_batch)
        tf_FW.add_summary(sumstr, i)
    train1d_nn0_prob = tf_OutProb.eval(feed_dict=fulltrain_batch)
    valid1d_nn0_prob = tf_OutProb.eval(feed_dict=validation_batch)
    test1d_nn0_prob = tf_OutProb.eval(feed_dict=test_batch)

In [99]:
display(sklearn.metrics.confusion_matrix(valid_y, np.argmax(valid1d_nn0_prob, axis=1)))
sklearn.metrics.accuracy_score(valid_y, np.argmax(valid1d_nn0_prob, axis=1))

array([[ 933,    0,    2,    1,    5,   11,    2,    0,    8,    1],
       [   0, 1076,    2,    0,    1,    6,    1,    1,    9,    3],
       [   8,   12,  818,   18,   10,    4,   12,   13,   20,    8],
       [   2,    2,   23,  928,    2,   25,    2,    9,   26,    3],
       [   1,    4,    7,    1,  887,    0,    8,    3,   11,   39],
       [   5,    6,    7,   29,    5,  753,   15,    1,   19,    4],
       [   4,    5,    5,    0,   11,   16,  903,    0,    4,    0],
       [   1,    7,    7,    3,    5,    3,    1,  901,    0,   50],
       [   4,   16,    7,   21,    1,   23,    5,    2,  816,   27],
       [   5,    4,    0,    8,   26,    6,    0,   25,    3,  863]], dtype=int64)

0.92479166666666668

### 1-hidden layer network
Current accuracy on validation is __97.3%__

In [15]:
tf.reset_default_graph()
dt_now = datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S")
log_dir = root_dir + 'Logs/' + dt_now + '-H200-LR-S-B5k'

tf_LearningRate = tf.placeholder(shape=(), name='LearningRate', dtype=tf.float32)
tf_Input = tf.placeholder(shape=(None, 784), name='Input', dtype=tf.float32)
tf_Labels = tf.placeholder(shape=(None, 10), name='Labels', dtype=tf.float32)

tf_Hidden = tf.layers.dense(tf_Input, 200, use_bias=True, activation=tf.nn.elu, name='Hidden-1')
tf_Output = tf.layers.dense(tf_Hidden, 10, use_bias=True, name='SoftMax')

tf_OutProb = tf.nn.softmax(tf_Output)

tf_Error = -tf.reduce_mean(tf.reduce_sum(tf_Labels * tf.log(tf_OutProb), reduction_indices=1))
tf_Optimizer = tf.train.GradientDescentOptimizer(tf_LearningRate)
tf_TrainStep = tf_Optimizer.minimize(tf_Error)

tf_Initialize = tf.global_variables_initializer()

tf_ErrorSummary = tf.summary.scalar('Error', tf_Error)
tf_FW = tf.summary.FileWriter(log_dir, tf.get_default_graph())

In [16]:
num_epochs = 100
num_steps = 100
batch_size = 5000
learning_rate = 0.5
fulltrain_batch = {tf_Input: train1d_X, tf_Labels:train1d_y}
validation_batch = {tf_Input: valid1d_X, tf_Labels: valid1d_y}
test_batch = {tf_Input: test1d_X, tf_Labels: test1d_y}
with tf.Session() as sess:
    tf_Initialize.run()
    for i in range(num_epochs):
        if i > 10:
            learning_rate = 0.2
        if i > 50:
            learning_rate = 0.1
        tX, ty = minibatch(train1d_X, train1d_y, num=batch_size)
        batch = {tf_Input: tX, tf_Labels:ty, tf_LearningRate: learning_rate}
        for j in range(num_steps):
            tf_TrainStep.run(feed_dict=batch)
        
        sumstr = tf_ErrorSummary.eval(feed_dict=validation_batch)
        tf_FW.add_summary(sumstr, i)
    train1d_nn1_prob = tf_OutProb.eval(feed_dict=fulltrain_batch)
    valid1d_nn1_prob = tf_OutProb.eval(feed_dict=validation_batch)
    test1d_nn1_prob = tf_OutProb.eval(feed_dict=test_batch)

In [96]:
sklearn.metrics.accuracy_score(valid_y, np.argmax(valid1d_nn1_prob, axis=1))

0.97281249999999997

### 2-hidden layer network
With 300-300 combination, dropout and res-net hack arrived at __98.1%__

In [17]:
tf.reset_default_graph()
dt_now = datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S")
log_dir = root_dir + 'Logs/' + dt_now + '-H300-H300-DROP-RES-LR-S-B5k-GD'

tf_Training = tf.placeholder(shape=(), name='Training', dtype=tf.bool)
tf_LearningRate = tf.placeholder(shape=(), name='LearningRate', dtype=tf.float32)
tf_Input = tf.placeholder(shape=(None, 784), name='Input', dtype=tf.float32)
tf_Labels = tf.placeholder(shape=(None, 10), name='Labels', dtype=tf.float32)

tf_Hidden1 = tf.layers.dense(tf_Input, 300, use_bias=True, activation=tf.nn.relu, name='Hidden-1')
tf_Hidden2 = tf.layers.dense(tf.layers.dropout(tf_Hidden1, training=tf_Training),
                             300, use_bias=True, activation=tf.nn.relu, name='Hidden-2')
tf_Output = tf.layers.dense(tf.layers.dropout(tf.concat([tf_Hidden1, tf_Hidden2], axis=1), training=tf_Training),
                            10, use_bias=True, name='SoftMax')

tf_OutProb = tf.nn.softmax(tf_Output)

#tf_Error = -tf.reduce_mean(tf.reduce_sum(tf_Labels * tf.log(tf_OutProb), reduction_indices=1))
tf_Error = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=tf_Labels, logits=tf_Output))
tf_TrainStep = tf.train.GradientDescentOptimizer(tf_LearningRate).minimize(tf_Error)

tf_Initialize = tf.global_variables_initializer()

tf_ErrorSummary = tf.summary.scalar('Error', tf_Error)
tf_FW = tf.summary.FileWriter(log_dir, tf.get_default_graph())

In [19]:
num_epochs = 100
num_steps = 100
batch_size = 5000
learning_rate = 0.5
fulltrain_batch = {tf_Training: False, tf_Input: train1d_X, tf_Labels:train1d_y}
validation_batch = {tf_Training: False, tf_Input: valid1d_X, tf_Labels: valid1d_y}
test_batch = {tf_Training: False, tf_Input: test1d_X, tf_Labels: test1d_y}
with tf.Session() as sess:
    tf_Initialize.run()
    for i in range(num_epochs):
        if i > 10:
            learning_rate = 0.2
        if i > 25:
            learning_rate = 0.1
        tX, ty = minibatch(train1d_X, train1d_y, num=batch_size)
        batch = {tf_Training: True, tf_Input: tX, tf_Labels:ty, tf_LearningRate: learning_rate}
        for j in range(num_steps):
            tf_TrainStep.run(feed_dict=batch)
        
        sumstr = tf_ErrorSummary.eval(feed_dict=validation_batch)
        tf_FW.add_summary(sumstr, i)
    train1d_nn2_prob = tf_OutProb.eval(feed_dict=fulltrain_batch)
    valid1d_nn2_prob = tf_OutProb.eval(feed_dict=validation_batch)
    test1d_nn2_prob = tf_OutProb.eval(feed_dict=test_batch)

In [144]:
sklearn.metrics.accuracy_score(valid_y, np.argmax(valid1d_nn2_prob, axis=1))

0.98052083333333329

In [20]:
display(sklearn.metrics.accuracy_score(train_y, np.argmax(train1d_nn2_prob, axis=1)))
display(sklearn.metrics.accuracy_score(valid_y, np.argmax(valid1d_nn2_prob, axis=1)))
display(sklearn.metrics.accuracy_score(test_y, np.argmax(test1d_nn2_prob, axis=1)))

0.99843749999999998

0.98062499999999997

0.98024999999999995

In [21]:
display(sklearn.metrics.accuracy_score(train_y, np.argmax(train1d_nn1_prob, axis=1)))
display(sklearn.metrics.accuracy_score(valid_y, np.argmax(valid1d_nn1_prob, axis=1)))
display(sklearn.metrics.accuracy_score(test_y, np.argmax(test1d_nn1_prob, axis=1)))

0.99382812499999995

0.97437499999999999

0.97233333333333338

In [23]:
display(sklearn.metrics.accuracy_score(train_y, np.argmax(train1d_nn0_prob, axis=1)))
display(sklearn.metrics.accuracy_score(valid_y, np.argmax(valid1d_nn0_prob, axis=1)))
display(sklearn.metrics.accuracy_score(test_y, np.argmax(test1d_nn0_prob, axis=1)))

0.92942708333333335

0.92343750000000002

0.92133333333333334

## 2D Neural Networks
### Prepare data

In [86]:
def mnist2d_transform_imgs(x):
    return x.reshape(x.shape[0], x.shape[1], x.shape[2], 1) / 255

def mnist2d_transform_lbls(y):
    return np.array([1.0*(y==i) for i in range(10)]).transpose()

In [87]:
(train2d_X, valid2d_X, test2d_X) = (mnist2d_transform_imgs(x) for x in (train_X, valid_X, test_X))
(train2d_y, valid2d_y, test2d_y) = (mnist2d_transform_lbls(y) for y in (train_y, valid_y, test_y))

### Full Manual
#### Desired architecture
1) Convolution layer with __K1__ 3x3 filter

2) Max Pooling layer 2x2 with stride => 28x28->14x14

3) Convolution layer __K2__ 3x3 filter

4) Max Pooling layer 2x2 with same padding and stride => 7x7

5) 1 fully connected output layer

In [136]:
def convolution_layer3x3(name, x_input, num_out, activation=tf.nn.relu):
    #x_input dimensions:
    #0    minibatch
    #1,2  row & col 
    #3    channel
    
    #output dimension:
    #0    minibatch
    #1,2  row & col 
    #3    channel (num_out)
    with tf.name_scope(name=name):
        tW0 = tf.Variable(tf.truncated_normal(stddev=0.1,shape=[1,1,1,num_out]), dtype=tf.float32, name='Intercept')
        tW = tf.Variable(tf.truncated_normal(stddev=0.1,shape=[3,3,int(x_input.shape[3]),int(num_out)]), dtype=tf.float32, name='Weights')
        tR = tW0 + tf.nn.conv2d(x_input, tW, strides=[1,1,1,1], padding='SAME')
        return activation(tR)

In [174]:
tf.reset_default_graph()
dt_now = datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S")
log_dir = root_dir + 'Logs/' + dt_now + '-15C3-MP2-15C3-MP2-F10'

tfTraining = tf.placeholder(shape=(),dtype=tf.bool) 
tfLR = tf.placeholder(shape=(),dtype=tf.float32)
tfInput = tf.placeholder(shape=(None,28,28,1),dtype=tf.float32)
tfLabels = tf.placeholder(shape=(None,10),dtype=tf.float32)
tfL1 = tf.nn.max_pool(convolution_layer3x3('L1-15C3', tfInput, 15), ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
tfL2 = tf.nn.max_pool(convolution_layer3x3('L2-15C3', tfL1, 15), ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
tfLF = tf.reshape(tfL2, shape=(-1, 49*15), name='FLAT')

tfOut = tf.layers.dense(tfLF, 10, use_bias=True, name='Output')
tfLoss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=tfLabels, logits=tfOut, name='Loss'))
tfOutProb = tf.nn.softmax(tfOut, name='OutputProbs')

tfAccuracy = 1.0-tf.reduce_mean(tf.cast(tf.equal(tf.argmax(tfOutProb, axis=1),tf.argmax(tfLabels, axis=1)), dtype=tf.float32))
tfAccuracySummary = tf.summary.scalar('Accuracy', tfAccuracy)

tfTrain = tf.train.GradientDescentOptimizer(tfLR).minimize(tfLoss)
tfInit = tf.global_variables_initializer()

tffw = tf.summary.FileWriter(log_dir, tf.get_default_graph())

In [187]:
learning_rate = 0.01
num_steps = 30
num_epochs = 500
batch_size = 200

valid_batch = {tfTraining: False, tfLR: 0.0, tfLabels: valid2d_y, tfInput: valid2d_X}
prev_loss = 1e20

with tf.Session() as sess:
    tfInit.run()
    for i in range(num_epochs):
        tX, ty = minibatch(train2d_X, train2d_y, num=batch_size)
        train_batch = {tfTraining: True, tfLR: learning_rate, tfLabels: ty, tfInput: tX}
        for j in range(num_steps):
            tfTrain.run(feed_dict=train_batch)
        while tfLoss.eval(feed_dict=train_batch) > 1.10 * prev_loss:
            print('repeating run')
            for j in range(num_steps):
                tfTrain.run(feed_dict=train_batch)
        prev_loss = tfLoss.eval(feed_dict=train_batch)
        accstr = tfAccuracySummary.eval(feed_dict=valid_batch)
        
        print('Epoch {0} training loss: '.format(i), prev_loss)
        tffw.add_summary(accstr, i)

Epoch 0 training loss:  2.24203
Epoch 1 training loss:  2.20602
Epoch 2 training loss:  2.07541
Epoch 3 training loss:  1.95152
Epoch 4 training loss:  1.72314
Epoch 5 training loss:  1.3309
Epoch 6 training loss:  0.980136
Epoch 7 training loss:  0.707363
Epoch 8 training loss:  0.541347
Epoch 9 training loss:  0.41554
Epoch 10 training loss:  0.40434
Epoch 11 training loss:  0.311344
repeating run
Epoch 12 training loss:  0.308851
Epoch 13 training loss:  0.247814
Epoch 14 training loss:  0.204581
Epoch 15 training loss:  0.182909
Epoch 16 training loss:  0.170959
repeating run
repeating run
Epoch 17 training loss:  0.186893
Epoch 18 training loss:  0.107808
repeating run
repeating run
Epoch 19 training loss:  0.109062
repeating run
repeating run
Epoch 20 training loss:  0.0886431
repeating run
Epoch 21 training loss:  0.0879674
Epoch 22 training loss:  0.0809701
repeating run
Epoch 23 training loss:  0.0613584
repeating run
repeating run
Epoch 24 training loss:  0.0523004
repeating 

KeyboardInterrupt: 

In [186]:
tf.reset_default_graph()
dt_now = datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S")
log_dir = root_dir + 'Logs/' + dt_now + '-15C3-MP2-15C3-MP2-H100-H100-F10'

tfTraining = tf.placeholder(shape=(),dtype=tf.bool) 
tfLR = tf.placeholder(shape=(),dtype=tf.float32)
tfInput = tf.placeholder(shape=(None,28,28,1),dtype=tf.float32)
tfLabels = tf.placeholder(shape=(None,10),dtype=tf.float32)

tfL1 = tf.nn.max_pool(tf.layers.conv2d(tfInput, 15, [3,3], strides=[1,1], padding='SAME'), ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
tfL2 = tf.nn.max_pool(tf.layers.conv2d(tfL1, 15, [3,3], strides=[1,1], padding='SAME'), ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
tfLF = tf.reshape(tfL2, shape=(-1, 49*15), name='FLAT')

tfH1 = tf.layers.dense(tfLF, 100, use_bias=True, name='Hidden1', activation=tf.nn.relu)
tfH2 = tf.layers.dense(tfH1, 100, use_bias=True, name='Hidden2', activation=tf.nn.relu)
tfOut = tf.layers.dense(tfH2, 10, use_bias=True, name='Output')
tfLoss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=tfLabels, logits=tfOut, name='Loss'))
tfOutProb = tf.nn.softmax(tfOut, name='OutputProbs')

tfAccuracy = 1.0-tf.reduce_mean(tf.cast(tf.equal(tf.argmax(tfOutProb, axis=1),tf.argmax(tfLabels, axis=1)), dtype=tf.float32))
tfAccuracySummary = tf.summary.scalar('Accuracy', tfAccuracy)

tfTrain = tf.train.GradientDescentOptimizer(tfLR).minimize(tfLoss)
tfInit = tf.global_variables_initializer()

tffw = tf.summary.FileWriter(log_dir, tf.get_default_graph())

In [26]:
np.sum(np.sum(train_X[:10,10:13,10:13] * np.array([[1,0,0],[0,1,0],[0,0,1]]),axis=1),axis=1).shape

(10,)

In [44]:
tf.reset_default_graph()
dt_now = datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S")
log_dir = root_dir + 'Logs/' + dt_now + '-TEST'

tfA = tf.Variable(train_X[:10,10:13,10:13],dtype=tf.float32)
tfW = tf.Variable(np.array([[1,0,0],[0,1,0],[0,0,1]],dtype=np.float32).reshape(3,3))
#tfR = tf.reduce_sum(tfA * tfW, axis=[1,2])
tfR2 = tf.tensordot(tfA, tfW, axes=[[1,2],[0,1]])

tfI = tf.global_variables_initializer()
tf.summary.FileWriter(log_dir, tf.get_default_graph())

with tf.Session() as ses:
    tfI.run()
    print('R: ', tfR2.eval())


R:  [ 1.0196079   1.10588241  0.          0.          0.95686275  1.03137255
  1.24705887  2.22745085  0.          1.36078429]


In [59]:
tmp0 = np.random.uniform(-1.0,1.0,[2,4])
tmp0

array([[ 0.98703956,  0.53436008,  0.59091353, -0.7829164 ],
       [-0.21208172,  0.38659534, -0.95584803,  0.13732337]])

In [60]:
tmp1 = np.array([0,0,1,0]).reshape(1,4)
tmp1

array([[0, 0, 1, 0]])

In [61]:
tmp0+tmp1

array([[ 0.98703956,  0.53436008,  1.59091353, -0.7829164 ],
       [-0.21208172,  0.38659534,  0.04415197,  0.13732337]])