In [1]:
import pandas as pd
import numpy as np

seed = 7

## Data Preprocessing

In [2]:
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

In [3]:
train.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [4]:
print ("Training Instances", train.shape[0])
print ("Training Instances", test.shape[0])
print ("Atrributes", test.shape[1])

Training Instances 42000
Training Instances 28000
Atrributes 784


In [5]:
y_train = train.pop('label')

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(train, y_train, 
                                                    stratify=y_train,
                                                    random_state=seed)

In [7]:
def data_transform(data, labels):
    data = data.astype(np.float32)
    if labels is not None:
        labels = (np.arange(10) == labels[:,None]).astype(np.float32)
    return data, labels

X_train, y_train = data_transform(X_train.values, y_train)
X_val, y_val = data_transform(X_val.values, y_val)
X_test, _ = data_transform(test.values, None)

print ("Training dataset dimensions=",X_train.shape, "\tTraining labels=",y_train.shape)
print ("Validation dataset dimensions=",X_val.shape, "\tValidation labels=",y_val.shape)
print ("Testing Dataset dimensions=", X_test.shape)

Training dataset dimensions= (31500, 784) 	Training labels= (31500, 10)
Validation dataset dimensions= (10500, 784) 	Validation labels= (10500, 10)
Testing Dataset dimensions= (28000, 784)


## Building the Neural Network

network consists of 5 fully connected layers with final softmax layer : 
1st layer = 200 x 784, 
 2nd layer = 200 x 100, 
 3rd layer = 100 x 60, 
 4th layer = 60 x 30, 
 5th layer = 30 x 10

In [8]:
import tensorflow as tf

tf.reset_default_graph()
K, L, M, N = 200, 100, 60, 30

# input
X = tf.placeholder(tf.float32, [None, 28*28])

# weights and biases of 5 fully connected layers
w1 = tf.Variable(tf.truncated_normal([28*28, K], stddev=0.1))
b1 = tf.Variable(tf.zeros([K]))

w2 = tf.Variable(tf.truncated_normal([K, L], stddev=0.1))
b2 = tf.Variable(tf.zeros([L]))

w3 = tf.Variable(tf.truncated_normal([L, M], stddev=0.1))
b3 = tf.Variable(tf.zeros([M]))

w4 = tf.Variable(tf.truncated_normal([M, N], stddev=0.1))
b4 = tf.Variable(tf.zeros([N]))

w5 = tf.Variable(tf.truncated_normal([N, 10], stddev=0.1))
b5 = tf.Variable(tf.zeros([10]))

  return f(*args, **kwds)


In [9]:
y1 = tf.nn.relu(tf.matmul(X, w1)+b1)
y2 = tf.nn.relu(tf.matmul(y1, w2)+ b2)
y3 = tf.nn.relu(tf.matmul(y2, w3)+ b3)
y4 = tf.nn.relu(tf.matmul(y3, w4)+ b4)

Y = tf.matmul(y4, w5)+ b5

In [10]:
Y_true = tf.placeholder(tf.float32, [None, 10])

In [11]:
loss = tf.nn.softmax_cross_entropy_with_logits(logits=Y, labels=Y_true)
mean_loss = tf.reduce_mean(loss)
is_correct = tf.equal(tf.argmax(Y, 1),tf.argmax(Y_true,1))
accuracy = tf.reduce_mean(tf.cast(is_correct,tf.float32))

#### decaying(exponential)  learning rate

In [12]:
global_step = tf.Variable(0, trainable=False)
starter_learning_rate = 0.003
learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
                                           10000, 0.8, staircase=True)

In [13]:
optimize = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(mean_loss, global_step=global_step)

## Training

In [14]:
sess = tf.InteractiveSession()
initializer = tf.global_variables_initializer()
sess.run(initializer)

In [15]:
batch_size = 100
batch_number = X_train.shape[0]//batch_size
saver = tf.train.Saver()

for epoch_counter in range(500):
    curr_epoch_loss = 0
    start = 0
    end = start + batch_size
    
    # training the network on batches
    for batch_counter in range(batch_number):
        batch_x = X_train[start:end]
        batch_y = y_train[start:end]
        start = end
        end = start+batch_size
        
        train_data = {X: batch_x, Y_true: batch_y}
        _, batch_loss = sess.run([optimize,mean_loss], feed_dict=train_data)
        curr_epoch_loss += batch_loss
    
    curr_epoch_loss /= batch_number   
    val_data = {X: X_val, Y_true: y_val}
    val_loss, val_accuracy = sess.run([mean_loss,accuracy], feed_dict=val_data)
    
    
    if (epoch_counter + 1) % 100 == 0:
        saver.save(sess, 'checkpoint_directory/neural_network',
                   global_step = global_step.eval())
    
    print ("Epoch %d: Train_Loss=%0.4f  Val_Loss=%0.4f  Val_Acc=%0.4f  eta=%0.6f  global_step=%d"
      % (epoch_counter+1, 
         curr_epoch_loss, 
         val_loss, 
         val_accuracy, 
         learning_rate.eval(session=sess),
         global_step.eval(session=sess)))

Epoch 1: Train_Loss=0.7822  Val_Loss=0.2571  Val_Acc=0.9247  eta=0.003000  global_step=315
Epoch 2: Train_Loss=0.1952  Val_Loss=0.2312  Val_Acc=0.9352  eta=0.003000  global_step=630
Epoch 3: Train_Loss=0.1455  Val_Loss=0.2134  Val_Acc=0.9431  eta=0.003000  global_step=945
Epoch 4: Train_Loss=0.1270  Val_Loss=0.2427  Val_Acc=0.9384  eta=0.003000  global_step=1260
Epoch 5: Train_Loss=0.1098  Val_Loss=0.2005  Val_Acc=0.9501  eta=0.003000  global_step=1575
Epoch 6: Train_Loss=0.0910  Val_Loss=0.1766  Val_Acc=0.9565  eta=0.003000  global_step=1890
Epoch 7: Train_Loss=0.0898  Val_Loss=0.1702  Val_Acc=0.9586  eta=0.003000  global_step=2205
Epoch 8: Train_Loss=0.0781  Val_Loss=0.2083  Val_Acc=0.9520  eta=0.003000  global_step=2520
Epoch 9: Train_Loss=0.0842  Val_Loss=0.1769  Val_Acc=0.9555  eta=0.003000  global_step=2835
Epoch 10: Train_Loss=0.0706  Val_Loss=0.1548  Val_Acc=0.9642  eta=0.003000  global_step=3150
Epoch 11: Train_Loss=0.0594  Val_Loss=0.1805  Val_Acc=0.9595  eta=0.003000  global

Epoch 89: Train_Loss=0.0079  Val_Loss=0.3365  Val_Acc=0.9752  eta=0.001920  global_step=28035
Epoch 90: Train_Loss=0.0045  Val_Loss=0.3373  Val_Acc=0.9750  eta=0.001920  global_step=28350
Epoch 91: Train_Loss=0.0021  Val_Loss=0.4020  Val_Acc=0.9751  eta=0.001920  global_step=28665
Epoch 92: Train_Loss=0.0021  Val_Loss=0.4395  Val_Acc=0.9743  eta=0.001920  global_step=28980
Epoch 93: Train_Loss=0.0115  Val_Loss=0.4629  Val_Acc=0.9715  eta=0.001920  global_step=29295
Epoch 94: Train_Loss=0.0273  Val_Loss=0.3096  Val_Acc=0.9730  eta=0.001920  global_step=29610
Epoch 95: Train_Loss=0.0373  Val_Loss=0.3737  Val_Acc=0.9699  eta=0.001920  global_step=29925
Epoch 96: Train_Loss=0.0233  Val_Loss=0.2735  Val_Acc=0.9725  eta=0.001536  global_step=30240
Epoch 97: Train_Loss=0.0076  Val_Loss=0.3262  Val_Acc=0.9739  eta=0.001536  global_step=30555
Epoch 98: Train_Loss=0.0053  Val_Loss=0.3149  Val_Acc=0.9740  eta=0.001536  global_step=30870
Epoch 99: Train_Loss=0.0060  Val_Loss=0.3935  Val_Acc=0.9748

Epoch 176: Train_Loss=0.0003  Val_Loss=0.5851  Val_Acc=0.9755  eta=0.000983  global_step=55440
Epoch 177: Train_Loss=0.0003  Val_Loss=0.5920  Val_Acc=0.9754  eta=0.000983  global_step=55755
Epoch 178: Train_Loss=0.0003  Val_Loss=0.5989  Val_Acc=0.9754  eta=0.000983  global_step=56070
Epoch 179: Train_Loss=0.0003  Val_Loss=0.6060  Val_Acc=0.9754  eta=0.000983  global_step=56385
Epoch 180: Train_Loss=0.0003  Val_Loss=0.6132  Val_Acc=0.9754  eta=0.000983  global_step=56700
Epoch 181: Train_Loss=0.0003  Val_Loss=0.6207  Val_Acc=0.9754  eta=0.000983  global_step=57015
Epoch 182: Train_Loss=0.0003  Val_Loss=0.6283  Val_Acc=0.9753  eta=0.000983  global_step=57330
Epoch 183: Train_Loss=0.0003  Val_Loss=0.6359  Val_Acc=0.9753  eta=0.000983  global_step=57645
Epoch 184: Train_Loss=0.0003  Val_Loss=0.6435  Val_Acc=0.9753  eta=0.000983  global_step=57960
Epoch 185: Train_Loss=0.0003  Val_Loss=0.6515  Val_Acc=0.9753  eta=0.000983  global_step=58275
Epoch 186: Train_Loss=0.0003  Val_Loss=0.6594  Val

Epoch 263: Train_Loss=0.0008  Val_Loss=1.2047  Val_Acc=0.9734  eta=0.000503  global_step=82845
Epoch 264: Train_Loss=0.0008  Val_Loss=1.2141  Val_Acc=0.9737  eta=0.000503  global_step=83160
Epoch 265: Train_Loss=0.0008  Val_Loss=1.2235  Val_Acc=0.9736  eta=0.000503  global_step=83475
Epoch 266: Train_Loss=0.0008  Val_Loss=1.2297  Val_Acc=0.9736  eta=0.000503  global_step=83790
Epoch 267: Train_Loss=0.0008  Val_Loss=1.2326  Val_Acc=0.9737  eta=0.000503  global_step=84105
Epoch 268: Train_Loss=0.0007  Val_Loss=1.2410  Val_Acc=0.9739  eta=0.000503  global_step=84420
Epoch 269: Train_Loss=0.0007  Val_Loss=1.2500  Val_Acc=0.9738  eta=0.000503  global_step=84735
Epoch 270: Train_Loss=0.0007  Val_Loss=1.2579  Val_Acc=0.9735  eta=0.000503  global_step=85050
Epoch 271: Train_Loss=0.0054  Val_Loss=1.2579  Val_Acc=0.9739  eta=0.000503  global_step=85365
Epoch 272: Train_Loss=0.0077  Val_Loss=1.3024  Val_Acc=0.9759  eta=0.000503  global_step=85680
Epoch 273: Train_Loss=0.0014  Val_Loss=1.2189  Val

KeyboardInterrupt: 

### applying softmax to generate output

In [None]:
predict = tf.argmax(tf.nn.softmax(Y),1)
predictions = predict.eval(feed_dict={X: X_test})

In [None]:
test_id = np.arange(1, len(X_test)+1)
submission = pd.DataFrame({'ImageId': test_id, 'Label':predictions})
submission.head()

In [None]:
submission.to_csv('my_submission.csv',index=False)