In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.model_selection import train_test_split
#from sklearn import svm
import tensorflow as tf

import os 

print("Tensorflow version " + tf.__version__)
tf.set_random_seed(0)

In [None]:
# tensorboard: event log location
LOGDIR = r'E:\myroot\work\data_science\kaggle\digit-recognizer\tensorboard_log\28'

In [None]:
# read raw training set
labeled_images = pd.read_csv('../input/train.csv')

In [None]:
print(labeled_images.shape)

In [None]:
# data preparation
from sklearn.preprocessing import LabelBinarizer # for one hot encoding
encoder = LabelBinarizer()
width = height = 28 # image resulation 28X28
images = labeled_images.iloc[:,1:]/255 # normalize values between 0 to 1 and take only image data (exclude label)
images = np.reshape(np.array(images), (-1, width, height, 1)) # reshape to 28X28 like pixel, -1 for unlimited rows, 1 for monochrome
labels = encoder.fit_transform(labeled_images.iloc[:,:1]) # take label and one-hot-encode


In [None]:
print(images.shape)
print(labels.shape)

In [None]:
# split given traing to further train and test sets
train_images, test_images,train_labels, test_labels = train_test_split(images, labels, train_size=0.8, random_state=0)


In [None]:
print(train_images.shape)
print(train_labels.shape)
print(test_images.shape)
print(test_labels.shape)


In [None]:
# placeholders
# input X: 28x28 grayscale images, the first dimension (None) will index the images in the mini-batch
X = tf.placeholder(tf.float32, [None, 28, 28, 1], name="X")
# correct answers will go here
Y_ = tf.placeholder(tf.float32, [None, 10], name="Y_")
# variable learning rate
lr = tf.placeholder(tf.float32, name="lr")
# dropout probability
pkeep = tf.placeholder(tf.float32, name="pkeep")

In [None]:
# layer's related variables
# three convolutional layers with their channel counts, and a
# fully connected layer (the last layer has 10 softmax neurons)
# try another value(24, 48, 64, 200)
K = 6  # first convolutional layer output depth 24
L = 24  # second convolutional layer output depth 48
M = 48  # third convolutional layer 64
N = 600  # fully connected layer 200

In [None]:
# The model
# make sure weights and biasses are NOT initialized with zeros
# convolution layers
with tf.name_scope('conv_layer1'): # tensorboard: using namespace
    W1 = tf.Variable(tf.truncated_normal([6, 6, 1, K], stddev=0.1), name="W1")  # 6x6 patch, 1 input channel, K output channels
    B1 = tf.Variable(tf.constant(0.1, tf.float32, [K]), name="B1")
    stride = 1  # output is 28x28
    Y1 = tf.nn.relu(tf.nn.conv2d(X, W1, strides=[1, stride, stride, 1], padding='SAME') + B1)
    # tensorboard: adding histogram
    tf.summary.histogram("weight1", W1)
    tf.summary.histogram("bias1", B1)

with tf.name_scope('conv_layer2'): # tensorboard: using namespace
    W2 = tf.Variable(tf.truncated_normal([5, 5, K, L], stddev=0.1), name="W2")
    B2 = tf.Variable(tf.constant(0.1, tf.float32, [L]), name="B2")
    stride = 2  # output is 14x14
    Y2 = tf.nn.relu(tf.nn.conv2d(Y1, W2, strides=[1, stride, stride, 1], padding='SAME') + B2)
    # tensorboard: adding histogram
    tf.summary.histogram("weight2", W2)
    tf.summary.histogram("bias2", B2)

with tf.name_scope('conv_layer3'): # tensorboard: using namespace
    W3 = tf.Variable(tf.truncated_normal([4, 4, L, M], stddev=0.1), name="W3")
    B3 = tf.Variable(tf.constant(0.1, tf.float32, [M]), name="B3")
    stride = 2  # output is 7x7
    Y3 = tf.nn.relu(tf.nn.conv2d(Y2, W3, strides=[1, stride, stride, 1], padding='SAME') + B3)
    # tensorboard: adding histogram
    tf.summary.histogram("weight3", W3)
    tf.summary.histogram("bias3", B3)

# reshape the output from the third convolution for the fully connected layer
YY = tf.reshape(Y3, shape=[-1, 7 * 7 * M])

# fully connected
with tf.name_scope('fc_layer2'): # tensorboard: using namespace
    W4 = tf.Variable(tf.truncated_normal([7 * 7 * M, N], stddev=0.1), name="W4")
    B4 = tf.Variable(tf.constant(0.1, tf.float32, [N]), name="B4")
    Y4 = tf.nn.relu(tf.matmul(YY, W4) + B4)
    YY4 = tf.nn.dropout(Y4, pkeep)
    # tensorboard: adding histogram
    tf.summary.histogram("weight4", W4)
    tf.summary.histogram("bias4", B4)

# outout
with tf.name_scope('output_layer'): # tensorboard: using namespace
    W5 = tf.Variable(tf.truncated_normal([N, 10], stddev=0.1), name="W5")
    B5 = tf.Variable(tf.constant(0.1, tf.float32, [10]), name="B5")
    Ylogits = tf.matmul(YY4, W5) + B5
    Y = tf.nn.softmax(Ylogits)
    # tensorboard: adding histogram
    tf.summary.histogram("weight5", W5)
    tf.summary.histogram("bias5", B5)


In [None]:
# loss/error mesurement
# cross-entropy loss function (= -sum(Y_i * log(Yi)) ), normalised for batches of 100  images
# TensorFlow provides the softmax_cross_entropy_with_logits function to avoid numerical stability
# problems with log(0) which is NaN
with tf.name_scope("cross_entropy"):
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=Ylogits, labels=Y_) # Note: need to use Ylogits here, instead og Y
    cross_entropy = tf.reduce_mean(cross_entropy)*100
    tf.summary.scalar("cross_entropy", cross_entropy)
    

In [None]:
# training step
with tf.name_scope("train_step"):
    train_step = tf.train.AdamOptimizer(lr).minimize(cross_entropy)

In [None]:
# accuracy of the trained model, between 0 (worst) and 1 (best)
with tf.name_scope("accuracy"):
    correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    tf.summary.scalar("accuracy", accuracy)

In [None]:
# tensorboard: merge all summary
summ = tf.summary.merge_all()

In [None]:
# create meaningfull string with related param name to point a subdirectory under LOGDIR and also for printing/logging
def make_hparam_string(learning_rate, hum_fc_layer, num_conv_layer):
    conv_param = "conv="+ str(num_conv_layer)
    fc_param = "fc="+str(hum_fc_layer)
    return "lr_%.0E,%s,%s" % (learning_rate, conv_param, fc_param)

In [None]:
# training related variables
learning_rate = 0.001
percent_keep = 0.25
train_data_length = len(train_images)

# params string for current run
hparam = make_hparam_string(learning_rate=learning_rate, hum_fc_layer=2, num_conv_layer=3)

# ops for initialize global variables
init = tf.global_variables_initializer()

# create tf session
sess = tf.Session()

# tensorboard: initiate writer
# train: all matrix here including train accuracy & loss
writer_train = tf.summary.FileWriter(os.path.join(LOGDIR , hparam, 'train'))
writer_train.add_graph(sess.graph)

# test: test accuracy & loss
writer_test = tf.summary.FileWriter(os.path.join(LOGDIR , hparam, 'test'))

# initialize global variables
sess.run(init)


In [None]:
# training loop
steps = 50 # how many times to apply entire training dataset
batch_size = 600 # train_data_length  should be dividable by this

for step in range(steps):  
    # foward pass
    for i in range(0, train_data_length, batch_size):
        # get next batch data from the randomize index
        batch_X = train_images[i:i+batch_size]
        batch_Y = train_labels[i:i+batch_size]
     
        # train with the batch
        sess.run(train_step, {X: batch_X, Y_: batch_Y, lr: learning_rate, pkeep: percent_keep})

    # backward pass
    for i in range(train_data_length-1, -1, batch_size):
        # get next batch data from the randomize index
        batch_X = train_images[i:i+batch_size]
        batch_Y = train_labels[i:i+batch_size]
     
        # train with the batch
        sess.run(train_step, {X: batch_X, Y_: batch_Y, lr: learning_rate, pkeep: percent_keep})
    
    # collect stats after one epoch (1 fwd + 1 bkw pass)
    train_accuracy, train_loss, s_train = sess.run([accuracy, cross_entropy, summ], {X: train_images, Y_: train_labels, pkeep: 1.0})
    writer_train.add_summary(s_train, i)
    
    test_accuracy, test_loss, s_test = sess.run([accuracy, cross_entropy, summ], {X: test_images, Y_: test_labels, pkeep: 1.0})
    writer_test.add_summary(s_test, i)
    # print stats
    print ("Train Step {}:: Accuracy: train {} test {} :: Loss: train {} test {}".format(step, train_accuracy, test_accuracy, train_loss, test_loss))


In [None]:
# check final accuracy of test dataset
# predict labels
test_pred_labels = np.argmax(sess.run(Y, feed_dict={X: test_images, pkeep: 1.0}),axis=1)

# predict accuracy
sum(test_pred_labels==np.argmax(test_labels, axis=1))/len(test_labels)

In [None]:
# Add ops to save and restore all the variables.
save_path ='E:\myroot\work\data_science\kaggle\digit-recognizer\codes\saved_models\digit-recognizer_v2.0'
saver = tf.train.Saver()
saver.save(sess, save_path)

In [None]:
# reinitiate all variables with default values
sess.run(init)

In [None]:
# check final accuracy of test dataset with default values
# predict labels
test_pred_labels = np.argmax(sess.run(Y, feed_dict={X: test_images, pkeep: 1.0}),axis=1)

# predict accuracy
sum(test_pred_labels==np.argmax(test_labels, axis=1))/len(test_labels)

In [None]:
# restore in the same jupyter session
saver.restore(sess, save_path)

In [None]:
# check final accuracy of test dataset with restored model
# predict labels
test_pred_labels = np.argmax(sess.run(Y, feed_dict={X: test_images, pkeep: 1.0}),axis=1)

# predict accuracy
sum(test_pred_labels==np.argmax(test_labels, axis=1))/len(test_labels)

In [None]:
# prepare for submit
# read test dataset
unlabeled_images = pd.read_csv('../input/test.csv')

# preprocess
from sklearn.preprocessing import LabelBinarizer # for one hot encoding
encoder = LabelBinarizer()
width = height = 28 # image resulation 28X28
submit_images = unlabeled_images/255 # normalize values between 0 to 1 and take only image data (exclude label)
submit_images = np.reshape(np.array(submit_images), (-1, width, height, 1)) # reshape to 28X28 like pixel, -1 for unlimited rows, 1 for monochrome

print(unlabeled_images.shape)


In [None]:
# predict labels for submit
submit_pred_labels = np.argmax(sess.run(Y, feed_dict={X: submit_images, pkeep: 1.0}),axis=1)


In [None]:
# submission file
submission = pd.DataFrame(data={'ImageId':(np.arange(submit_pred_labels.shape[0])+1), 'Label':submit_pred_labels})
submission.to_csv('submission_v1.9.csv', index=False)
submission.tail()

In [None]:
# close the session
sess.close()