In [1]:
%matplotlib inline

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf

  from ._conv import register_converters as _register_converters


In [9]:
dataframe = pd.read_csv('train.csv').as_matrix()

In [10]:
dataframe.shape

(42000, 785)

In [3]:
y, X = dataframe[:,0], dataframe[:, 1:]
y = y.reshape([-1,1])

indices = list(range(len(X)))
np.random.shuffle(indices)

X = X[indices].astype(np.float32)
y = y[indices].astype(np.float32)

In [4]:
# Split it into train and validation
n_train = int(0.7 * len(y))

y_train, X_train, y_test, X_test = y[: n_train], X[: n_train, :], y[n_train:], X[n_train:, :]

X_train -= 127.0
X_train /= 127.0

X_test  -= 127.0
X_test /= 127.0

In [5]:
def get_next_batch(batch_size, X, y):
    for i in range(int(y.shape[0]/batch_size)-1):
        yield X[i: i+batch_size, :] , y[i: i+batch_size]

In [6]:
def combine(tensor_one, tensor_two):
    return tf.div(tf.add(tensor_one, tensor_two), 2)

In [7]:
input_ = tf.placeholder(dtype=tf.float32, shape=(None, X_train.shape[1]))
labels = tf.placeholder(dtype=tf.int32, shape=(None, 1))
learning_rate_ph = tf.placeholder(dtype=tf.float32)
keep_prob = tf.placeholder(dtype=tf.float32)

In [8]:
def nn():
    input_2d = tf.reshape(input_,[-1,28,28,1])
    l1_1 = tf.layers.conv2d(input_2d,8,5,1,activation=tf.nn.relu)
    l1_2 = tf.layers.average_pooling2d(l1_1,2,1)
    l1_3 = tf.layers.conv2d(l1_2,8,3,1,activation=tf.nn.relu)
    l1_4 = tf.layers.max_pooling2d(l1_3,2,2)
    l1_5 = tf.layers.flatten(l1_4)
    l2   = tf.layers.dense(l1_5, 32, activation=tf.nn.relu)
    out  = tf.layers.dense(l2,10)
    
    # Define the cost
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=out, labels=tf.one_hot(labels,depth=10)))
    
    # Apply an optimizer
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate_ph).minimize(cost)
    
    return cost, optimizer, out

In [None]:
cost, optimizer, output = nn()

In [None]:
from sklearn.metrics import accuracy_score
from tqdm import tqdm

epochs = 75
batch_size = 512
num_batches = int(X_train.shape[0]/batch_size)
n_train = int(len(y_train)*0.8)

train_accuracy = []
val_accuracy = []
test_accuracy = []
cost_curr = []

def eval_cost(X, y):
    total_cost = 0
    nb_batches = 0
    for X,y in get_next_batch(256,X,y):
        feed_dict={input_: X, labels: y, learning_rate_ph:current_placeholder_value, keep_prob:1.0}
        total_cost += cost.eval(feed_dict=feed_dict)
        nb_batches += 1
    return total_cost / nb_batches

def eval_accuracy(X,y):
    nb_batches = 0
    total_acc = 0
    for X,y in get_next_batch(256,X,y):
        feed_dict={input_: X, labels: y, learning_rate_ph:current_placeholder_value, keep_prob:1.0}
        y_predicted = np.argmax(output.eval(feed_dict=feed_dict),1)
        total_acc += accuracy_score(y,y_predicted)
        nb_batches += 1
    return total_acc/nb_batches
        

# Launch the graph
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    current_placeholder_value = 0.001
    
    for epoch in tqdm(range(epochs)):
        # Shuffle data for cross validation
        indices = np.array(range(len(y_train)))
        np.random.shuffle(indices)
        
        train_indices = indices[: n_train]
        val_indices = indices[n_train:]
        
        print('train iindices', len(train_indices), 'val indices', len(val_indices))
        
        y_train_fold, X_train_fold, y_val_fold, X_val_fold = y_train[train_indices], X_train[train_indices, :], \
        y_train[val_indices], X_train[val_indices, :]
        
        # Loop over all batches
        for x,y in get_next_batch(batch_size, X_train_fold, y_train_fold):
            sess.run(optimizer, 
                     feed_dict={input_:x, labels:y, learning_rate_ph:current_placeholder_value, keep_prob:0.70})
        
        
        if (epoch+1) % 1 == 0:
            # Find training cost.
            c = eval_cost(X_train_fold, y_train_fold)
            cost_curr.append(c)
            # Find train accuracy
            current_train_acc = eval_accuracy(X_train_fold,y_train_fold)
            train_accuracy.append(current_train_acc)
            # Find the validation accuracy
            current_val_acc = eval_accuracy(X_val_fold, y_val_fold)
            val_accuracy.append(current_val_acc)
            # Find test accuracy
            test_acc = eval_accuracy(X_test, y_test)
            test_accuracy.append(test_acc)
            
            
            print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(c))
            print("Train Accuracy:", current_train_acc)
            print("Validation Accuracy:", current_val_acc)
            print("Test Accuracy:",test_acc)
            print()
            
            if test_acc > 0.99:
                saver = tf.train.Saver()
                saver.save(sess,'./mnist-acc{}'.format(test_acc*100.0),global_step=epoch+1)
              
    
    print("Optimization Finished!")
    # Find test accuracy
    print("Test Accuracy:",eval_accuracy(X_test, y_test) )

In [None]:
plt.plot(train_accuracy, 'b-', val_accuracy, 'r-', test_accuracy,'k-')
plt.show()

In [None]:
plt.plot(cost_curr, 'b-')
plt.show()

In [None]:
from sklearn.metrics import confusion_matrix

confusion_matrix(y_true=y_test, y_pred=y_predicted_test)

In [None]:
indices = np.where(np.logical_and(y_predicted_test == 7, y_test.squeeze() == 9))[0]
for i in indices:
    plt.figure(figsize=(0.50,0.50))
    plt.imshow(X_test[i].reshape([28,28]),cmap='gray')
    plt.show()