### A binary to train MNIST using multiple GPUs with synchronous updates.

In [13]:
import os
import numpy as np
from scipy import ndimage
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras

import tensorflow.contrib.slim as slim
from tensorflow.examples.tutorials.mnist import input_data
%matplotlib inline  
print ("CURRENT TF VERSION IS [%s]" % (tf.__version__))
print ("PACKAGES LOADED")

from tensorflow.python.client import device_lib

print(device_lib.list_local_devices())


CURRENT TF VERSION IS [1.12.0]
PACKAGES LOADED
[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 12497894620802816270
, name: "/device:XLA_GPU:0"
device_type: "XLA_GPU"
memory_limit: 17179869184
locality {
}
incarnation: 3578738625412227571
physical_device_desc: "device: XLA_GPU device"
, name: "/device:XLA_CPU:0"
device_type: "XLA_CPU"
memory_limit: 17179869184
locality {
}
incarnation: 2495710620587495093
physical_device_desc: "device: XLA_CPU device"
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 15078866944
locality {
  bus_id: 1
  links {
    link {
      device_id: 1
      type: "StreamExecutor"
      strength: 1
    }
    link {
      device_id: 2
      type: "StreamExecutor"
      strength: 1
    }
    link {
      device_id: 3
      type: "StreamExecutor"
      strength: 1
    }
  }
}
incarnation: 16065825689711561015
physical_device_desc: "device: 0, name: Tesla V100-SXM2-16GB, pci bus id: 0000:00:1b.0, compute capability: 7

### Load MNIST Dataset 

In [16]:
mnist = keras.datasets.mnist
(trainimg, trainlabel), (testimg, testlabel) = mnist.load_data()

# Img size : 28 x 28 x 1
# Num classes : 10

print('Training data shape : ', trainimg.shape)
print('Class dataset : ', trainlabel.shape)
print('Testing data shape : ', testimg.shape)
print('Class dataset : ', testlabel.shape)

Training data shape :  (60000, 28, 28)
Class dataset :  (60000,)
Testing data shape :  (10000, 28, 28)
Class dataset :  (10000,)


In [17]:
### Data Preprocessing

onehot_mat = np.eye(10)

trainimg  = np.reshape(trainimg, [-1, 784])
testimg   = np.reshape(testimg, [-1, 784])

trainlabel = np.concatenate([[onehot_mat[int(x),:]] for x in trainlabel], axis=0)
testlabel   = np.concatenate([[onehot_mat[int(x),:]] for x in testlabel], axis=0)

print('Training data shape : ', trainimg.shape)
print('Class dataset : ', trainlabel.shape)
print('Testing data shape : ', testimg.shape)
print('Class dataset : ', testlabel.shape)
# train_images_flat = sess.run(tf.reshape(train_images, shape=(N_TRN,-1)))
# test_images_flat  = sess.run(tf.reshape(test_images, shape=(N_TST,-1)))
# train_labels_1h   = sess.run(tf.one_hot(train_labels, depth=n_classes))
# test_labels_1h    = sess.run(tf.one_hot(test_labels, depth=n_classes))

Training data shape :  (60000, 784)
Class dataset :  (60000, 10)
Testing data shape :  (10000, 784)
Class dataset :  (10000, 10)


### Define model

In [19]:
n_input   = 784
n_classes = 10

N_TRN = len(trainlabel)
N_TST = len(testlabel)

BATCH_SIZE = 0
NUM_GPUS   = 8

In [14]:
with tf.variable_scope('var'):
    x = tf.placeholder('float', [None, n_input])
    y = tf.placeholder('float', [None, n_classes])
    is_training = tf.placeholder(tf.bool)

def lrelu(x, leak=0.2, name='lrelu'):
    with tf.variable_scope(name):
        f1 = 0.5 * (1 + leak)
        f2 = 0.5 * (1 - leak)
        return f1 * x + f2 * abs(x)

def CNN(inputs, is_training=True):
    x   = tf.reshape(inputs, [-1, 28, 28, 1])
    
    batch_norm_params = {'is_training': is_training, 'decay': 0.9
                        , 'updates_collections': None}
    init_func = tf.truncated_normal_initializer(stddev=0.01)
    
    net = slim.conv2d(x, 32, kernel_size=[5,5], padding='SAME' 
                     , activation_fn       = lrelu
                     , weights_initializer = init_func
                     , normalizer_fn       = slim.batch_norm
                     , normalizer_params   = batch_norm_params
                     , scope='conv1')
    net = slim.max_pool2d(net, [2, 2], scope='pool1')
    net = slim.conv2d(x, 64, kernel_size=[5,5], padding='SAME' 
                     , activation_fn       = lrelu
                     , weights_initializer = init_func
                     , normalizer_fn       = slim.batch_norm
                     , normalizer_params   = batch_norm_params
                     , scope='conv2')
    net = slim.max_pool2d(net, [2, 2], scope='pool2')
    net = slim.flatten(net, scope='flatten3')
    net = slim.fully_connected(net, 1024
                     , activation_fn       = lrelu
                     , weights_initializer = init_func
                     , normalizer_fn       = slim.batch_norm
                     , normalizer_params   = batch_norm_params
                     , scope='fc4')
    net = slim.dropout(net, keep_prob=0.7, is_training=is_training, scope='dr')
    out = slim.fully_connected(net, n_classes
                     , activation_fn       = None
                     , weights_initializer = init_func
                     , normalizer_fn       = None
                     , scope='fco')
    return out
print('Network ready')

Network ready


In [8]:
def tower_loss(scope, images, labels):
    """Calculate the total loss on a single tower running the MNIST model.

    Args:
    scope: unique prefix string identifying the MNIST tower, e.g. 'tower_0'
    images: Images. 2D tensor of shape [batch_size, :].
    labels: Labels. 1D tensor of shape [batch_size].
    Returns:
     Tensor of shape [] containing the total loss for a batch of data
    """
    # Prediction
    pred = CNN(images, is_training)

    # LOSS
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(
            labels=labels, logits=pred), name='total_loss')
    
    return cost
    

In [9]:
def average_gradients(tower_grads):
    """Calculate the average gradient for each shared variable across all towers.
    Note that this function provides a synchronization point across all towers.
    
    Args:
    tower_grads: List of lists of (gradient, variable) tuples. The outer list
      is over individual gradients. The inner list is over the gradient
      calculation for each tower.
    Returns:
     List of pairs of (gradient, variable) where the gradient has been averaged
     across all towers.
    """
    
    average_grads = []
    for grad_and_vars in zip(*tower_grads):
        # Note that each grad_and_vars looks like the following:
        #   ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
        
        grad = []
        
        for g, _ in grad_and_vars:
            # Add 0 dimension to the gradients to represent the tower.
            expanded_g = tf.expand_dims(g, 0)

            # Append on a 'tower' dimension which we will average over below.
            grads.append(expanded_g)
            
        # Average over the 'tower' dimension
        grad = tf.concat(axis=0, values=grads)
        grad = tf.reduce_mean(grad, 0)
        
        # Keep in mind that the Variables are redundant because they are shared
        # across towers. So .. we will just return the first tower's pointer to
        # the Variable.
        
        v = grad_and_vars[0][1]
        grad_and_var = (grad, v)
        average_grads.append(grad_and_var)
        
    return average_grads

In [22]:
def train():
    """Train MNIST for a number of steps."""
    
    with tf.Graph().as_default(), tf.devide('/CPU:0'):
        # Create a variable to count the number of train() calls. This equals the
        # number of batches processed * FLAGS.num_gpus.
        global_step = tf.get_variable(
            'global_step', [], 
            initializer=tf.constant_initializer(0), trainable=False)
        
        # Calculate the learning rate schedule
        num_batches_per_epoch = N_TRN / BATCH_SIZE / NUM_GPUS
        
        # Create an optimizer that performs gradient descent.
        optm = tf.train.AdamOptimizer(learning_rate=0.001)
        
        # Get images and labels for MNIST
        batch_queue = tf.contrib.slim.prefetch_queue.prefetch_queue(
            [trainimg, trainlabel], capacity=2 * NUM_GPUS)
        
        # Calculate the gradients for each model tower.
        tower_grads = []
        with tf.variable_scope(tf.get_variable_scope()):
            for i in range(NUM_GPUS):
                with tf.device('/GPU:%d' % i):
                    with tf.name_scope('%s_%d' % ('TOWER', i)) as scope:
                        # Dequeues one batch for the GPU
                        image_batch, label_batch = batch_queue.dequeue()
                        
                        # Calculate the loss for one tower of the CIFAR model. This function
                        # constructs the entire MNIST model but shares the variables across
                        # all towers.
                        loss = tower_loss(scope, image_batch, label_batch)                        
                        
                        # Resue variables for the next tower
                        tf.get_variable_scope().reuse_variables()
                        
                        # Calculate the gradients for the batch of data on this CIFAR tower.
                        grads = optm.compute_gradients(loss)
                        
                        # Keep track of the gradients across all towers.
                        tower_grads.append(grads)
        
        # with~~
        # We must calculate the mean of each gradient. Note that this is the
        # synchronization point across all towers.
        grads = average_gradients(tower_grads)
        
        # Add histograms for gradients
        
        # Apply the gradients to adjust the shared variables.
        apply_gradient_op = optm.apply_gradients(grads, global_step=global_step)
        
        # Track the moving averages of all trainable variables.
        variable_averages = tf.train.ExponentialMovingAverage(
            0.01, global_step)
        variables_averages_op = variable_averages.apply(tf.trainable_variables())
        

In [1]:
def main(argv=None):
    print('hi!')
    

In [4]:
if __name__ == '__main__':
    tf.app.run()

hi!


SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
