In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
tf.reset_default_graph()
import random
from sklearn.neighbors import NearestNeighbors
tf.set_random_seed(777)
np.random.seed(444)
import seaborn as sns
import pickle, gzip
from mpl_toolkits.axes_grid1 import ImageGrid
import matplotlib.pyplot as plt
import sys

In [2]:
def get_data_python2():
    with gzip.open('mnist.pkl.gz', 'rb') as fd:
        #u = pickle._Unpickler(fd)
        #u.encoding = 'latin1'
        train_set, valid_set, test_set = pickle.load(fd)
        #u.load()
        return train_set, valid_set, test_set

In [3]:
def get_data_python3():
    with gzip.open('mnist.pkl.gz', 'rb') as fd:
        u = pickle._Unpickler(fd)
        u.encoding = 'latin1'
        train_set, valid_set, test_set = u.load()
        return train_set, valid_set, test_set

In [4]:
if(sys.version[0]=='3'):
    train_set, valid_set, test_set = get_data_python3()
elif(sys.version[0]=='2'):
    train_set, valid_set, test_set = get_data_python2()

In [5]:
# combine train and val
train_set = (np.concatenate([train_set[0], valid_set[0]], axis=0), 
                np.concatenate([train_set[1], valid_set[1]], axis=0))

In [6]:
print("Size of training set:", len(train_set[0]))

('Size of training set:', 60000)


In [7]:
print("Size of each training set item (28x28 image 1-d array): ", len(train_set[0][0]))

('Size of each training set item (28x28 image 1-d array): ', 784)


In [8]:
#Reshaping each 784 1-d array into 28x28 2-d array
Train_data = train_set[0].reshape(60000,28,28)

In [9]:
Train_data.shape

(60000, 28, 28)

In [11]:
##### Initialization ##########################################################
num_epochs = 50
LR = 0.0001
nunit = 32
batch_size = 10000

In [12]:
weights = {
    # Convolution Layers
    'c1': tf.get_variable('W1', shape=(5,5,1,20), \
            initializer=tf.contrib.layers.xavier_initializer()), 
    'c2': tf.get_variable('W2', shape=(5,5,20,50), \
            initializer=tf.contrib.layers.xavier_initializer()),
    'c3': tf.get_variable('W3', shape=(4,4,50,500), \
            initializer=tf.contrib.layers.xavier_initializer()),
    'c4': tf.get_variable('W4', shape=(1,1,500,10), \
            initializer=tf.contrib.layers.xavier_initializer()),
    
    # Dense Layers
    #'d1': tf.get_variable('W5', shape=(7*7*32,28*28), 
     #       initializer=tf.contrib.layers.xavier_initializer()),
    #'out': tf.get_variable('W6', shape=(128,n_classes), 
     #       initializer=tf.contrib.layers.xavier_initializer()),
}
biases = {
    # Convolution Layers
    'c1': tf.get_variable('B1', shape=(20), initializer=tf.zeros_initializer()),
    'c2': tf.get_variable('B2', shape=(50), initializer=tf.zeros_initializer()),
    'c3': tf.get_variable('B3', shape=(500), initializer=tf.zeros_initializer()),
    'c4': tf.get_variable('B4', shape=(10), initializer=tf.zeros_initializer()),
    
    # Dense Layers
    #'d1': tf.get_variable('B5', shape=(128), initializer=tf.zeros_initializer()),
    #'out': tf.get_variable('B6', shape=(n_classes), initializer=tf.zeros_initializer()),
}

In [13]:
def conv2d_1(x, W, b, strides=1):
    x = tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='VALID')
    x = tf.nn.bias_add(x, b)
    return x
def conv2d_2(x, W, b, strides=1):
    x = tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='VALID')
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x)
def conv2d_3(x, W, b, strides=1):
    x = tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='VALID')
    x = tf.nn.bias_add(x, b)
    return tf.nn.softmax(x)

In [14]:
def conv_net_dropout(data, weights, biases, training=False):
    # Convolution layers
    conv1 = conv2d_1(data, weights['c1'], biases['c1']) 
    dropout1 = tf.nn.dropout(conv1,0.8)
    pool1 = tf.nn.max_pool(dropout1, ksize=[1,2,2,1], strides=[1,2,2,1], padding='VALID')     
    conv2 = conv2d_1(pool1, weights['c2'], biases['c2'])
    dropout2 = tf.nn.dropout(conv2,0.8)
    pool2 = tf.nn.max_pool(dropout2, ksize=[1,2,2,1], strides=[1,2,2,1], padding='VALID') 
    conv3 = conv2d_2(pool2, weights['c3'], biases['c3']) 
    conv4 = conv2d_3(conv3, weights['c4'], biases['c4']) 
    
    out = conv4 # [10]
    return out

In [24]:
epsilon = 1e-3
def conv_net(data, weights, biases, training=False):
    # Convolution layers
    conv1 = conv2d_1(data, weights['c1'], biases['c1']) 
    batch_mean1, batch_var1 = tf.nn.moments(conv1,[0])
    scale1 = tf.Variable(tf.ones([20]))
    beta1 = tf.Variable(tf.zeros([20]))
    normalized_layer_1 = tf.nn.batch_normalization(conv1, batch_mean1,batch_var1,beta1,scale1,epsilon)
    pool1 = tf.nn.max_pool(normalized_layer_1, ksize=[1,2,2,1], strides=[1,2,2,1], padding='VALID')   
    conv2 = conv2d_1(pool1, weights['c2'], biases['c2'])
    batch_mean2, batch_var2 = tf.nn.moments(conv2,[0])
    scale2 = tf.Variable(tf.ones([50]))
    beta2 = tf.Variable(tf.zeros([50]))
    normalized_layer_2 = tf.nn.batch_normalization(conv2, batch_mean2,batch_var2,beta2,scale2,epsilon)
    pool2 = tf.nn.max_pool(normalized_layer_2, ksize=[1,2,2,1], strides=[1,2,2,1], padding='VALID') 
    conv3 = conv2d_2(pool2, weights['c3'], biases['c3']) 
    batch_mean3, batch_var3 = tf.nn.moments(conv3,[0])
    scale3 = tf.Variable(tf.ones([500]))
    beta3 = tf.Variable(tf.zeros([500]))
    normalized_layer_3 = tf.nn.batch_normalization(conv3, batch_mean3,batch_var3,beta3,scale3,epsilon)
    conv4 = conv2d_3(normalized_layer_3, weights['c4'], biases['c4']) 
    out = conv4 # [10]
    return out

In [25]:
Xtrain = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
logits = conv_net(Xtrain, weights, biases)
#logits = conv_net_dropout(Xtrain, weights, biases)

In [26]:
n_classes=10
ytrain = tf.placeholder(tf.float32, shape=(None, n_classes))
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, 
                                                                 labels=ytrain))

In [27]:
#optimizer = tf.train.GradientDescentOptimizer(0.1)
optimizer = tf.train.MomentumOptimizer(learning_rate=0.1,momentum=0.9)
#optimizer = tf.train.AdamOptimizer(1e-3)
train_op = optimizer.minimize(loss)

#GradientDescentOptimizer
#MomentumOptimizer

In [28]:
test_images = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
tests_labels=tf.placeholder(tf.float32, shape=(None, n_classes))
test_predictions = tf.nn.softmax(conv_net(test_images, weights, biases))
#acc,acc_op = tf.metrics.accuracy(predictions=tf.argmax(test_predictions,1), 
 #                                labels=tests_labels )

In [29]:
import sklearn.preprocessing as skp
enc = skp.OneHotEncoder(handle_unknown='ignore')
enc.fit(train_set[1].reshape(-1,1))
Ytrain_data=enc.transform(train_set[1].reshape(-1,1)).toarray()

In [None]:
num_epochs = 50
batch_size=5000
sess = tf.Session()
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
for epochs in range(num_epochs):
    res_avg = 0.
    for i in range(0, 60000, batch_size):
        feed_dict={Xtrain: Train_data[i:i+batch_size].reshape(-1,28,28,1), ytrain:Ytrain_data[i:i+batch_size].reshape(-1,10) }
        _,res = sess.run([train_op,loss], feed_dict=feed_dict)
        res_avg += np.sum(res)
    if epochs%5==0.:
        print(int(60000/batch_size)*epochs, res_avg/60000)
        print(np.mean(res))

(0, 0.0003394070645173391)
1.5276014
(60, 0.0002961809694766998)
1.474844
(120, 0.0002943173468112946)
1.4689826
(180, 0.0002936561604340871)
1.4660041
(240, 0.0002933264056841532)
1.4647366
(300, 0.0002930129547913869)
1.4639701
(360, 0.0002928788681825002)
1.4636068


In [48]:

Test_data = test_set[0].reshape(10000,28,28)
pred=sess.run(test_predictions,feed_dict={test_images:Test_data.reshape(-1,28,28,1)})

In [49]:
pred_label=[]
for i in range(10000):
    pred_label.append(np.argmax(pred[i]))

In [50]:
pred_label=np.array(pred_label)

In [51]:
import sklearn.metrics
accuracy = sklearn.metrics.accuracy_score(test_set[1], pred_label)

In [52]:
accuracy

0.9823