# DNN classifier for the EMNIST dataset with data fetching and training running concurrently

In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import os

  return f(*args, **kwds)


In [2]:
def _parse_function(filename, label):
    image_string = tf.read_file(filename)

    # Don't use tf.image.decode_image, or the output shape will be undefined
    image = tf.image.decode_jpeg(image_string, channels=1)

    # This will convert to float values in [0, 1]
    image = tf.image.convert_image_dtype(image, tf.float32)

    #image = tf.image.resize_images(image, [64, 64])
    return image, label


In [3]:
def input_fn():
    mapping = pd.read_csv("train-labels.csv", header = None)
    
    filenames = list(mapping.iloc[:,0])
    labels = list(mapping.iloc[:,1])
    
    parse_fn = lambda f, l: _parse_function(f, l)
    
    dataset = tf.data.Dataset.from_tensor_slices((filenames, labels))
    dataset = dataset.shuffle(len(filenames))
    dataset = dataset.map(parse_fn, num_parallel_calls=4)
    #dataset = dataset.map(train_preprocess, num_parallel_calls=4)
    dataset = dataset.repeat(5)
    dataset = dataset.batch(64)
    dataset = dataset.prefetch(1)
    
    
    iterator = dataset.make_one_shot_iterator()
    images, labels = iterator.get_next()
    #iterator = dataset.make_initializable_iterator()
    #images, labels = iterator.get_next()
    #iterator_init_op = iterator.initializer
    
    #inputs = {'images': images, 'labels': labels, 'iterator_init_op': iterator_init_op}
    return {"images": images}, labels

In [4]:
# Test the input function

next_batch = input_fn() 

with tf.Session() as sess:
    first_batch = sess.run(next_batch)
    print(first_batch)

({'images': array([[[[ 0.        ],
         [ 0.        ],
         [ 0.        ],
         ..., 
         [ 0.        ],
         [ 0.        ],
         [ 0.        ]],

        [[ 0.        ],
         [ 0.        ],
         [ 0.        ],
         ..., 
         [ 0.        ],
         [ 0.        ],
         [ 0.        ]],

        [[ 0.        ],
         [ 0.        ],
         [ 0.        ],
         ..., 
         [ 0.        ],
         [ 0.        ],
         [ 0.        ]],

        ..., 
        [[ 0.        ],
         [ 0.        ],
         [ 0.        ],
         ..., 
         [ 0.        ],
         [ 0.        ],
         [ 0.        ]],

        [[ 0.        ],
         [ 0.        ],
         [ 0.        ],
         ..., 
         [ 0.        ],
         [ 0.        ],
         [ 0.        ]],

        [[ 0.        ],
         [ 0.        ],
         [ 0.        ],
         ..., 
         [ 0.        ],
         [ 0.        ],
         [ 0.        ]]],


      

In [5]:
feature_x = tf.feature_column.numeric_column("images", shape=784)
feature_columns = [feature_x]

In [6]:
num_hidden_units = [512, 256, 128]
num_classes = 62

In [7]:
model = tf.estimator.DNNClassifier(feature_columns=feature_columns,
                                   hidden_units=num_hidden_units,
                                   activation_fn=tf.nn.relu,
                                   n_classes=num_classes,
                                   model_dir="./checkpoints/")

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': './checkpoints/', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f3bbb106ac8>, '_task_type': 'worker', '_task_id': 0, '_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [8]:
model.train(input_fn=input_fn, steps=2000)

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Restoring parameters from ./checkpoints/model.ckpt-4188
INFO:tensorflow:Saving checkpoints for 4189 into ./checkpoints/model.ckpt.
INFO:tensorflow:loss = 35.8898, step = 4189
INFO:tensorflow:global_step/sec: 0.961115
INFO:tensorflow:loss = 27.3342, step = 4289 (104.047 sec)
INFO:tensorflow:global_step/sec: 1.93771
INFO:tensorflow:loss = 32.4575, step = 4389 (51.636 sec)
INFO:tensorflow:global_step/sec: 3.71461
INFO:tensorflow:loss = 32.3618, step = 4489 (26.891 sec)
INFO:tensorflow:global_step/sec: 9.21371
INFO:tensorflow:loss = 48.6553, step = 4589 (10.853 sec)
INFO:tensorflow:global_step/sec: 16.0651
INFO:tensorflow:loss = 26.119, step = 4689 (6.227 sec)
INFO:tensorflow:global_step/sec: 27.2814
INFO:tensorflow:loss = 32.0038, step = 4789 (3.662 sec)
INFO:tensorflow:global_step/sec: 26.4262
INFO:tensorflow:loss = 30.0325, step = 4889 (3.784 sec)
INFO:tensorflow:global_step/sec: 30.3014
INFO:tensorflow:loss = 33.935, step = 49

<tensorflow.python.estimator.canned.dnn.DNNClassifier at 0x7f3bbb1060b8>

## TEST

In [9]:
def test_input_fn():
    mapping = pd.read_csv("test-labels.csv", header = None)
    filenames = list(mapping.iloc[:,0])
    labels = list(mapping.iloc[:,1])
    
    parse_fn = lambda f, l: _parse_function(f, l)
    
    dataset = tf.data.Dataset.from_tensor_slices((filenames, labels))
    #dataset = dataset.shuffle(len(filenames))
    dataset = dataset.map(parse_fn, num_parallel_calls=4)
    #dataset = dataset.map(train_preprocess, num_parallel_calls=4)
    dataset = dataset.repeat(1)
    dataset = dataset.batch(64)
    dataset = dataset.prefetch(1)
    
    
    iterator = dataset.make_one_shot_iterator()
    images, labels = iterator.get_next()
    #iterator = dataset.make_initializable_iterator()
    #images, labels = iterator.get_next()
    #iterator_init_op = iterator.initializer
    
    #inputs = {'images': images, 'labels': labels, 'iterator_init_op': iterator_init_op}
    return {"images": images}, labels

In [10]:
model.evaluate(input_fn=test_input_fn)

INFO:tensorflow:Starting evaluation at 2018-08-21-19:54:44
INFO:tensorflow:Restoring parameters from ./checkpoints/model.ckpt-6188
INFO:tensorflow:Finished evaluation at 2018-08-21-19:54:51
INFO:tensorflow:Saving dict for global step 6188: accuracy = 0.798867, average_loss = 0.617792, global_step = 6188, loss = 39.4335


{'accuracy': 0.79886669,
 'average_loss': 0.61779195,
 'global_step': 6188,
 'loss': 39.433529}

## METRICS

In [11]:
predictions = model.predict(input_fn=test_input_fn)
preds = []
for prediction in predictions:
    preds.append(prediction["class_ids"][0])


INFO:tensorflow:Restoring parameters from ./checkpoints/model.ckpt-6188


In [12]:
# test-labels.csv - contains the mappings of the filenames to thier labels
mapping = pd.read_csv("test-labels.csv", header = None)

# actual labels
actual = list(mapping.iloc[:,1])

In [13]:
act = tf.placeholder(tf.int64, shape = [15000])
pred = tf.placeholder(tf.int64, shape = [15000])

## Accuracy

In [14]:
acc, acc_op = tf.metrics.accuracy(labels=act, predictions=pred)

In [15]:
with tf.Session() as sess:
    sess.run(tf.local_variables_initializer())
    sess.run(tf.global_variables_initializer())
    print(sess.run([acc, acc_op], feed_dict={act: actual, pred: preds}))

[0.0, 0.79886669]


## Precision

In [16]:
precision, precision_op = tf.metrics.precision(labels=act, predictions=pred)

In [17]:
with tf.Session() as sess:
    sess.run(tf.local_variables_initializer())
    sess.run(tf.global_variables_initializer())
    print(sess.run([precision, precision_op], feed_dict={act: actual, pred: preds}))

[0.0, 0.98669392]


## Recall

In [18]:
recall, recall_op = tf.metrics.recall(labels=act, predictions=pred)

In [19]:
with tf.Session() as sess:
    sess.run(tf.local_variables_initializer())
    sess.run(tf.global_variables_initializer())
    print(sess.run([recall, recall_op], feed_dict={act: actual, pred: preds}))

[0.0, 0.98178631]


## Confusion matrix

In [20]:
conf = tf.confusion_matrix(labels=act, predictions=pred)


In [21]:
with tf.Session() as sess:
    sess.run(tf.local_variables_initializer())
    sess.run(tf.global_variables_initializer())
    print(sess.run(conf, feed_dict={act: actual, pred: preds}))

[[536   0   1 ...,   0   0   0]
 [  0 811   1 ...,   0   0   0]
 [  0   0 711 ...,   1   0   9]
 ..., 
 [  0   0   0 ...,  28   0   0]
 [  0   0   0 ...,   0  11   0]
 [  0   0  19 ...,   0   0  31]]


## Recall per class

In [23]:
acts_k = tf.placeholder(tf.int64, shape = [None])
preds_k = tf.placeholder(tf.int64, shape = [None])

In [24]:
recall_k, recall_op_k = tf.metrics.recall(labels=acts_k, predictions=preds_k)

In [25]:
with tf.Session() as sess:
    sess.run(tf.local_variables_initializer())
    sess.run(tf.global_variables_initializer())
    for k in range(62):
        temp = [(actual[i],preds[i]) for i in range(15000) if actual[i]==k]
        temp2 = list(zip(*temp))
        actual_k=list(temp2[0])
        pred_k=list(temp2[1])
        print(k, "   : ", end = " ")
        print(sess.run([recall_k, recall_op_k], feed_dict={acts_k: actual_k, preds_k: pred_k}))

0    :  [0.0, 0.0]
1    :  [0.0, 1.0]
2    :  [1.0, 1.0]
3    :  [1.0, 1.0]
4    :  [1.0, 1.0]
5    :  [1.0, 1.0]
6    :  [1.0, 1.0]
7    :  [1.0, 1.0]
8    :  [1.0, 0.99967009]
9    :  [0.99967009, 0.99970603]
10    :  [0.99970603, 0.99971181]
11    :  [0.99971181, 0.99957317]
12    :  [0.99957317, 0.9993099]
13    :  [0.9993099, 0.99781722]
14    :  [0.99781722, 0.99785149]
15    :  [0.99785149, 0.99791175]
16    :  [0.99791175, 0.99779594]
17    :  [0.99779594, 0.99781209]
18    :  [0.99781209, 0.99787927]
19    :  [0.99787927, 0.99790329]
20    :  [0.99790329, 0.99791694]
21    :  [0.99791694, 0.99794042]
22    :  [0.99794042, 0.99798435]
23    :  [0.99798435, 0.99802488]
24    :  [0.99802488, 0.97643322]
25    :  [0.97643322, 0.97690159]
26    :  [0.97690159, 0.97651327]
27    :  [0.97651327, 0.97681284]
28    :  [0.97681284, 0.97780246]
29    :  [0.97780246, 0.97823936]
30    :  [0.97823936, 0.97857559]
31    :  [0.97857559, 0.97869086]
32    :  [0.97869086, 0.97889256]
33    :  

## Precision per class

In [26]:
precision_k, precision_op_k = tf.metrics.precision(labels=acts_k, predictions=preds_k)

In [27]:
with tf.Session() as sess:
    sess.run(tf.local_variables_initializer())
    sess.run(tf.global_variables_initializer())
    for k in range(62):
        temp = [(actual[i],preds[i]) for i in range(15000) if actual[i]==k]
        temp2 = list(zip(*temp))
        actual_k=list(temp2[0])
        pred_k=list(temp2[1])
        print(k, "   : ", end = " ")
        print(sess.run([precision_k, precision_op_k], feed_dict={acts_k: actual_k, preds_k: pred_k}))

0    :  [0.0, 0.0]
1    :  [0.0, 0.81999999]
2    :  [0.81999999, 0.89540672]
3    :  [0.89540672, 0.92736357]
4    :  [0.92736357, 0.94373327]
5    :  [0.94373327, 0.95271456]
6    :  [0.95271456, 0.95994914]
7    :  [0.95994914, 0.96584129]
8    :  [0.96584129, 0.96975517]
9    :  [0.96975517, 0.97296524]
10    :  [0.97296524, 0.97348112]
11    :  [0.97348112, 0.97380459]
12    :  [0.97380459, 0.97455919]
13    :  [0.97455919, 0.97481006]
14    :  [0.97481006, 0.97519684]
15    :  [0.97519684, 0.97587746]
16    :  [0.97587746, 0.97603041]
17    :  [0.97603041, 0.97620249]
18    :  [0.97620249, 0.97691745]
19    :  [0.97691745, 0.97717392]
20    :  [0.97717392, 0.97731912]
21    :  [0.97731912, 0.9775694]
22    :  [0.9775694, 0.97803855]
23    :  [0.97803855, 0.97847134]
24    :  [0.97847134, 0.97922391]
25    :  [0.97922391, 0.97963798]
26    :  [0.97963798, 0.97975576]
27    :  [0.97975576, 0.9800148]
28    :  [0.9800148, 0.98087043]
29    :  [0.98087043, 0.98124814]
30    :  [0.981