In [28]:
import tensorflow as tf
import numpy as np
import pandas as pd
tf.reset_default_graph()
import random
from sklearn.neighbors import NearestNeighbors
tf.set_random_seed(777)
np.random.seed(444)
import seaborn as sns
import pickle, gzip
from mpl_toolkits.axes_grid1 import ImageGrid
import matplotlib.pyplot as plt
import sys

In [29]:
def get_data_python2():
    with gzip.open('mnist.pkl.gz', 'rb') as fd:
        #u = pickle._Unpickler(fd)
        #u.encoding = 'latin1'
        train_set, valid_set, test_set = pickle.load(fd)
        #u.load()
        return train_set, valid_set, test_set

In [30]:
def get_data_python3():
    with gzip.open('mnist.pkl.gz', 'rb') as fd:
        u = pickle._Unpickler(fd)
        u.encoding = 'latin1'
        train_set, valid_set, test_set = u.load()
        return train_set, valid_set, test_set

In [31]:
if(sys.version[0]=='3'):
    train_set, valid_set, test_set = get_data_python3()
elif(sys.version[0]=='2'):
    train_set, valid_set, test_set = get_data_python2()

In [32]:
# combine train and val
train_set = (np.concatenate([train_set[0], valid_set[0]], axis=0), 
                np.concatenate([train_set[1], valid_set[1]], axis=0))

In [33]:
print("Size of training set:", len(train_set[0]))

('Size of training set:', 60000)


In [34]:
print("Size of each training set item (28x28 image 1-d array): ", len(train_set[0][0]))

('Size of each training set item (28x28 image 1-d array): ', 784)


In [35]:
#Reshaping each 784 1-d array into 28x28 2-d array
Train_data = train_set[0].reshape(60000,28,28)

In [36]:
Train_data.shape

(60000, 28, 28)

In [37]:
##### Initialization ##########################################################
num_epochs = 500
LR = 0.0001
nunit = 32
batch_size = 10000

In [38]:
weights = {
    # Convolution Layers
    'c1': tf.get_variable('W1', shape=(5,5,1,20), \
            initializer=tf.contrib.layers.xavier_initializer()), 
    'c2': tf.get_variable('W2', shape=(5,5,20,50), \
            initializer=tf.contrib.layers.xavier_initializer()),
    'c3': tf.get_variable('W3', shape=(4,4,50,500), \
            initializer=tf.contrib.layers.xavier_initializer()),
    'c4': tf.get_variable('W4', shape=(1,1,500,10), \
            initializer=tf.contrib.layers.xavier_initializer()),
    
    # Dense Layers
    #'d1': tf.get_variable('W5', shape=(7*7*32,28*28), 
     #       initializer=tf.contrib.layers.xavier_initializer()),
    #'out': tf.get_variable('W6', shape=(128,n_classes), 
     #       initializer=tf.contrib.layers.xavier_initializer()),
}
biases = {
    # Convolution Layers
    'c1': tf.get_variable('B1', shape=(20), initializer=tf.zeros_initializer()),
    'c2': tf.get_variable('B2', shape=(50), initializer=tf.zeros_initializer()),
    'c3': tf.get_variable('B3', shape=(500), initializer=tf.zeros_initializer()),
    'c4': tf.get_variable('B4', shape=(10), initializer=tf.zeros_initializer()),
    
    # Dense Layers
    #'d1': tf.get_variable('B5', shape=(128), initializer=tf.zeros_initializer()),
    #'out': tf.get_variable('B6', shape=(n_classes), initializer=tf.zeros_initializer()),
}

In [39]:
def conv2d_1(x, W, b, strides=1):
    x = tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='VALID')
    x = tf.nn.bias_add(x, b)
    return x
def conv2d_2(x, W, b, strides=1):
    x = tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='VALID')
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x)
def conv2d_3(x, W, b, strides=1):
    x = tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='VALID')
    x = tf.nn.bias_add(x, b)
    return tf.nn.softmax(x)

In [40]:
def conv_net_dropout(data, weights, biases, training=False):
    # Convolution layers
    conv1 = conv2d_1(data, weights['c1'], biases['c1']) 
    dropout1 = tf.nn.dropout(conv1,0.8)
    pool1 = tf.nn.max_pool(dropout1, ksize=[1,2,2,1], strides=[1,2,2,1], padding='VALID')     
    conv2 = conv2d_1(pool1, weights['c2'], biases['c2'])
    dropout2 = tf.nn.dropout(conv2,0.8)
    pool2 = tf.nn.max_pool(dropout2, ksize=[1,2,2,1], strides=[1,2,2,1], padding='VALID') 
    conv3 = conv2d_2(pool2, weights['c3'], biases['c3']) 
    conv4 = conv2d_3(conv3, weights['c4'], biases['c4']) 
    
    out = conv4 # [10]
    return out

In [41]:
def conv_net(data, weights, biases, training=False):
    # Convolution layers
    conv1 = conv2d_1(data, weights['c1'], biases['c1']) 
    pool1 = tf.nn.max_pool(conv1, ksize=[1,2,2,1], strides=[1,2,2,1], padding='VALID')     
    conv2 = conv2d_1(pool1, weights['c2'], biases['c2'])
    pool2 = tf.nn.max_pool(conv2, ksize=[1,2,2,1], strides=[1,2,2,1], padding='VALID') 
    conv3 = conv2d_2(pool2, weights['c3'], biases['c3']) 
    conv4 = conv2d_3(conv3, weights['c4'], biases['c4']) 
    
    out = conv4 # [10]
    return out

In [42]:
Xtrain = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
#logits = conv_net(Xtrain, weights, biases)
logits = conv_net_dropout(Xtrain, weights, biases)

In [43]:
n_classes=10
ytrain = tf.placeholder(tf.float32, shape=(None, n_classes))
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, 
                                                                 labels=ytrain))

In [44]:
#optimizer = tf.train.GradientDescentOptimizer(0.1)
optimizer = tf.train.MomentumOptimizer(learning_rate=0.1,momentum=0.9)
#optimizer = tf.train.AdamOptimizer(1e-3)
train_op = optimizer.minimize(loss)

#GradientDescentOptimizer
#MomentumOptimizer

In [45]:
test_images = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
tests_labels=tf.placeholder(tf.float32, shape=(None, n_classes))
test_predictions = tf.nn.softmax(conv_net(test_images, weights, biases))
#acc,acc_op = tf.metrics.accuracy(predictions=tf.argmax(test_predictions,1), 
 #                                labels=tests_labels )

In [46]:
import sklearn.preprocessing as skp
enc = skp.OneHotEncoder(handle_unknown='ignore')
enc.fit(train_set[1].reshape(-1,1))
Ytrain_data=enc.transform(train_set[1].reshape(-1,1)).toarray()

In [47]:
num_epochs = 50
batch_size=5000
sess = tf.Session()
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
for epochs in range(num_epochs):
    res_avg = 0.
    for i in range(0, 60000, batch_size):
        feed_dict={Xtrain: Train_data[i:i+batch_size].reshape(-1,28,28,1), ytrain:Ytrain_data[i:i+batch_size].reshape(-1,10) }
        _,res = sess.run([train_op,loss], feed_dict=feed_dict)
        res_avg += np.sum(res)
    if epochs%5==0.:
        print(int(60000/batch_size)*epochs, res_avg/60000)
        print(np.mean(res))

(0, 0.00045949558814366656)
2.2889304
(60, 0.00034555988907814027)
1.7037094
(120, 0.0003372207303841909)
1.6760511
(180, 0.0003279685060183207)
1.5946137
(240, 0.0003162566622098287)
1.5696284
(300, 0.0002986410240332286)
1.4846694
(360, 0.0002971501628557841)
1.479535
(420, 0.0002964046279589335)
1.4765501
(480, 0.0002958662807941437)
1.4748328
(540, 0.0002956817309061686)
1.4748602


In [48]:

Test_data = test_set[0].reshape(10000,28,28)
pred=sess.run(test_predictions,feed_dict={test_images:Test_data.reshape(-1,28,28,1)})

In [49]:
pred_label=[]
for i in range(10000):
    pred_label.append(np.argmax(pred[i]))

In [50]:
pred_label=np.array(pred_label)

In [51]:
import sklearn.metrics
accuracy = sklearn.metrics.accuracy_score(test_set[1], pred_label)

In [52]:
accuracy

0.9823