In [1]:
import os
import numpy as np
from scipy import ndimage
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.python.client import device_lib
import datetime
from tensorflow.examples.tutorials.mnist import input_data
%matplotlib inline  
print ("CURRENT TF VERSION IS [%s]" % (tf.__version__))
print ("PACKAGES LOADED")

MODEL_DIR = './nets/'

# Check the available GPUs in the notebook
def check_available_pus(dev_type='GPU'):
    local_devices = device_lib.list_local_devices()
    gpu_names = [x.name for x in local_devices if x.device_type == dev_type]
    gpu_num = len(gpu_names)

    print('{0} {1}(s) are detected : {2}'.format(gpu_num, dev_type, gpu_names))

    return gpu_num

# Define CNN model
def model(X, reuse=False, is_trn=True):
    with tf.variable_scope('L1', reuse=reuse):
        X  = tf.layers.batch_normalization(inputs=X, reuse=reuse)
        L1 = tf.layers.conv2d(inputs=X
                              , filters=64
                              , kernel_size=[3, 3]
                              , reuse=reuse
                              , activation=tf.nn.relu
                              , padding='SAME')
        L1 = tf.layers.max_pooling2d(inputs=L1
                                     , pool_size=[2, 2]
                                     , strides=[2, 2])
        L1 = tf.layers.dropout(inputs=L1
                               , rate=0.7
                               , training=is_trn)

    with tf.variable_scope('L2', reuse=reuse):
        L1 = tf.layers.batch_normalization(inputs=L1, reuse=reuse)
        L2 = tf.layers.conv2d(inputs=L1
                              , filters=128
                              , kernel_size=[3, 3]
                              , reuse=reuse
                              , activation=tf.nn.relu
                              , padding='SAME')
        L2 = tf.layers.max_pooling2d(inputs=L2
                                     , pool_size=[2, 2]
                                     , strides=[2, 2])
        L2 = tf.layers.dropout(inputs=L2
                               , rate=0.7
                               , training=is_trn)

    with tf.variable_scope('L2-1', reuse=reuse):
        L2   = tf.layers.batch_normalization(inputs=L2, reuse=reuse)
        L2_1 = tf.layers.conv2d(inputs=L2
                                , filters=128
                                , kernel_size=[3, 3]
                                , reuse=reuse
                                , activation=tf.nn.relu
                                , padding='SAME')
        L2_1 = tf.layers.max_pooling2d(inputs=L2_1
                                       , pool_size=[2, 2]
                                       , strides=[2, 2])
        L2_1 = tf.layers.dropout(inputs=L2_1
                                 , rate=0.7
                                 , training=is_trn)
        
    with tf.variable_scope('L3', reuse=reuse):
        L2_1 = tf.layers.batch_normalization(inputs=L2, reuse=reuse)
        L3   = tf.contrib.layers.flatten(inputs=L2_1)
        L3   = tf.layers.dense(L3, 1024, activation=tf.nn.relu)
        L3   = tf.layers.dropout(L3, 0.5, is_trn)

    with tf.variable_scope('L4', reuse=reuse):
        L3 = tf.layers.batch_normalization(inputs=L3, reuse=reuse)
        L4 = tf.layers.dense(L3, 256, activation=tf.nn.relu)

    with tf.variable_scope('LF', reuse=reuse):
        LF = tf.layers.dense(L4, 10, activation=None)

    return LF


# Test model
def test():
    with tf.device(tf.DeviceSpec(device_type="CPU", device_index=0)):
        
        # Test Configuration ----------------------------------------------------
        # need to change learning rates and batch size by number of GPU
        N_CLASSES     = 10
        NUM_GPUS      = check_available_pus('GPU')
        NUM_CPUS      = check_available_pus('CPU')
        #NUM_GPUS      = 1
        
        # Placeholders
        X      = tf.placeholder(tf.float32, [None, 28, 28, 1])
        Y      = tf.placeholder(tf.float32, [None, N_CLASSES])
        IS_TRN = tf.placeholder(tf.bool)
        
        # Split placeholders for each GPU operation
        X_A = tf.split(X, int(NUM_GPUS))
        Y_A = tf.split(Y, int(NUM_GPUS))
        
        # Load dataset ------------------------------------------------------------
        mnist = keras.datasets.mnist
        (trainimg, trainlabel), (testimg, testlabel) = mnist.load_data()
        
        # Img size : 28 x 28 x 1
        # Num classes : 10
        print('Testing data shape : ', testimg.shape)
        print('Class dataset : ', testlabel.shape)

        # Data Preprocesisng ------------------------------------------------------------
        N_TST = len(testlabel)

        onehot_mat = np.eye(10)

        #testimg   = np.reshape(testimg, [-1, 784])
        testimg   = np.reshape(testimg, [-1, 28, 28, 1])

        testlabel   = np.concatenate([[onehot_mat[int(x),:]] for x in testlabel], axis=0)

        print('Testing data shape : ', testimg.shape)
        print('Class dataset : ', testlabel.shape)
        
        # Build Graph --------------------------------------------------------------------
        losses = []
        accres = []
        
        val_acc     = 0.0
        val_acc_max = 0.0
        best_epoch  = 0

        for gpu_id in range(int(NUM_GPUS)):
            with tf.device(tf.DeviceSpec(device_type="GPU", device_index=gpu_id)):
                with tf.variable_scope(tf.get_variable_scope(), reuse=(gpu_id > 0)):
                    pred = model(X_A[gpu_id], gpu_id > 0)
#                     cost = tf.nn.softmax_cross_entropy_with_logits_v2(
#                                     logits=pred,
#                                     labels=Y_A[gpu_id])
                    corr = tf.equal(tf.argmax(pred, 1), tf.argmax(Y_A[gpu_id], 1))    
                    accr = tf.reduce_mean(tf.cast(corr, "float"))

                    #losses.append(cost)
                    accres.append(accr)

#        loss = tf.reduce_mean(tf.concat(losses, axis=0))
        accr = tf.reduce_mean(accres)

#         optimizer = tf.train.AdamOptimizer(LEARNING_RATE).minimize(
#             loss, colocate_gradients_with_ops=True)  # Important!
        
        # Training process ------------------------------------------------------------------
        
        with tf.Session() as sess:
            
            saver = tf.train.Saver()
            restorename = os.path.join(MODEL_DIR, "net-" + 'BEST' + ".ckpt")
            print ("LOADING [%s]" % (restorename))
            saver.restore(sess, restorename)
            
            feeds = {X: testimg, Y: testlabel, IS_TRN: False}
            test_acc = sess.run(accr, feed_dict=feeds)
            print ("TEST ACCURACY: %.5f" % (test_acc))        

#         print("--- Training time : {0} seconds /w {1} GPUs ---".format(
#             datetime.datetime.now() - start_time, NUM_GPUS))

CURRENT TF VERSION IS [1.12.0]
PACKAGES LOADED


In [2]:
def main():
    test()

if __name__ == '__main__':
    main()
    print(' PROGRAM TERMINATED')

4 GPU(s) are detected : ['/device:GPU:0', '/device:GPU:1', '/device:GPU:2', '/device:GPU:3']
1 CPU(s) are detected : ['/device:CPU:0']
Testing data shape :  (10000, 28, 28)
Class dataset :  (10000,)
Testing data shape :  (10000, 28, 28, 1)
Class dataset :  (10000, 10)
LOADING [./nets/net-BEST.ckpt]
INFO:tensorflow:Restoring parameters from ./nets/net-BEST.ckpt
TEST ACCURACY: 0.91610
 PROGRAM TERMINATED
