In [None]:
import numpy as np
import tensorflow as tf
import time

# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
import sys
sys.path.append('../')
from util.my_plot import plot_images_labels_prediction, plot_batch

In [None]:
mnist = input_data.read_data_sets("MNIST_data", one_hot=True)

In [None]:
def kernel(name,shape):
    return tf.get_variable(name+'_w', initializer=tf.truncated_normal(shape, stddev=0.1))

In [None]:
def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME')

In [None]:
def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')

In [None]:
def layer(name, output_dim,input_dim,inputs, activation=None):
        
    W = tf.get_variable(name+ '_w', initializer=tf.random_normal([input_dim, output_dim]))
    b = tf.get_variable(name+ '_b', initializer=tf.random_normal([1,output_dim]))
    
    XWb = tf.matmul(inputs, W) + b
    if activation is None:
        outputs = XWb
    else:
        outputs = activation(XWb)
    return outputs

In [None]:
# Build a convolutional neural network

def conv_net(x, reuse):
    
    with tf.variable_scope('ConvNet', reuse=reuse):

        x_image = tf.reshape(x, shape=[-1, 28, 28, 1])

        # convolution       
        W1 = kernel('cv1', [5 ,5, 1, 16])
        Conv1 = conv2d(x_image, W1)
        C1_Conv = tf.nn.relu(Conv1)

        # max pool
        C1_Pool = max_pool_2x2(C1_Conv)
        
        # convolution 
        W2 = kernel('cv2', [5 ,5, 16, 36])
        Conv2 = conv2d(C1_Pool, W2)
        C2_Conv = tf.nn.relu(Conv2)
        
        # max pool
        C2_Pool = max_pool_2x2(C2_Conv)    
        
        
        #MLP
        D_Flat = tf.reshape(C2_Pool, [-1, 1764])
        D_Hidden=layer(name='cv3', output_dim=256,input_dim=1764, inputs=D_Flat ,activation=tf.nn.relu)  
        D_Hidden_Dropout= tf.nn.dropout(D_Hidden, keep_prob=0.5)
        out=layer(name='cv4',output_dim=10,input_dim=256, inputs=D_Hidden_Dropout,activation=tf.nn.softmax)

    return out

In [None]:
# Build the function to average the gradients
def average_gradients(tower_grads):
    average_grads = []
    for grad_and_vars in zip(*tower_grads):
        # Note that each grad_and_vars looks like the following:
        #   ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
        grads = []
        for g, v in grad_and_vars:
            # Add 0 dimension to the gradients to represent the tower.
            expanded_g = tf.expand_dims(g, 0)

            # Append on a 'tower' dimension which we will average over below.
            grads.append(expanded_g)

        # Average over the 'tower' dimension.
        grad = tf.concat(grads, 0)
        grad = tf.reduce_mean(grad, 0)

        # Keep in mind that the Variables are redundant because they are shared
        # across towers. So .. we will just return the first tower's pointer to
        # the Variable.
        v = grad_and_vars[0][1]
        grad_and_var = (grad, v)
        average_grads.append(grad_and_var)
    return average_grads

In [None]:
# By default, all variables will be placed on '/gpu:0'
# So we need a custom device function, to assign all variables to '/cpu:0'
# Note: If GPUs are peered, '/gpu:0' can be a faster option
PS_OPS = ['Variable', 'VariableV2', 'AutoReloadVariable']

def assign_to_device(device, ps_device='/gpu:0'):
    def _assign(op):
        node_def = op if isinstance(op, tf.NodeDef) else op.node_def
        if node_def.op in PS_OPS:
            return "/" + ps_device
        else:
            return device

    return _assign

In [None]:
# Place all ops on CPU by default
num_gpus = 2

with tf.device('/gpu:0'):
    tower_grads = []
    reuse_vars = False

    # tf Graph input
    X = tf.placeholder(tf.float32, [None, 784])
    Y = tf.placeholder(tf.float32, [None, 10])

    # Loop over all GPUs and construct their own computation graph
    for i in range(num_gpus):
        with tf.device(assign_to_device('/gpu:{}'.format(i), ps_device='/gpu:0')):


            # Create a graph for training
            logits_train = conv_net(X, reuse=reuse_vars)
            

            loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits_train, labels=Y))
            optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
            grads = optimizer.compute_gradients(loss_op)

            correct_pred = tf.equal(tf.argmax(logits_train, 1), tf.argmax(Y, 1))
            accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

            reuse_vars = True
            tower_grads.append(grads)

    tower_grads = average_gradients(tower_grads)
    train_op = optimizer.apply_gradients(tower_grads)

    

# 開始訓練

In [None]:
trainEpochs = 15
batchSize = 100
totalBatchs = int(mnist.train.num_examples/batchSize)


In [None]:
print(totalBatchs)

In [None]:



# Launch the graph
sess=tf.Session()
sess.run(tf.global_variables_initializer())


for epoch in range(trainEpochs):
    
    for i in range(totalBatchs):
        ts = time.time()
        batch_x, batch_y = mnist.train.next_batch(batchSize * num_gpus)
        sess.run(train_op, feed_dict={X: batch_x, Y: batch_y})
        te = time.time() - ts

    loss, acc = sess.run([loss_op, accuracy], feed_dict={X: batch_x,
                                                         Y: batch_y})
    
    val_loss,val_acc = sess.run([loss_op,accuracy], feed_dict={X: mnist.validation.images, 
                               Y: mnist.validation.labels})
        
    print("Epoch " + str(epoch+1) + \
          ": Loss= " + "{:.4f}".format(loss) + \
          ", Accuracy= " + "{:.3f}".format(acc) + \
          ", Val Loss " + "{:.4f}".format(val_loss) + \
          ", Val Accuracy= " + "{:.3f}".format(val_acc) + \
          ", %i Examples/sec" % int(len(batch_x)/te))


print("Optimization Finished!")



# 評估模型準確率

In [None]:
print("Accuracy:", sess.run(accuracy,
                           feed_dict={X: mnist.test.images,
                                      Y: mnist.test.labels}))

In [None]:
prediction_result=sess.run(tf.argmax(logits_train,1),
                           feed_dict={X: mnist.test.images })

In [None]:
prediction_result[:10]

In [None]:
mnist.test.images.shape
aa = mnist.test.images.reshape(10000,28,28)
prediction_result.shape
mnist.test.labels

In [None]:
plot_images_labels_prediction(aa,
                              np.argmax(mnist.test.labels,axis=1),
                              prediction_result,0)

In [None]:
y_predict_Onehot=sess.run(logits_train,
                          feed_dict={X: mnist.test.images })

In [None]:
y_predict_Onehot[8]

# 針對位移過的圖進行預測

In [None]:
from util.my_plot import generate_shift_mnist_data
class_names = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

shift_img, shift_img_label = generate_shift_mnist_data(10000)
plot_batch(shift_img, shift_img_label,class_names, 5)

In [None]:
from keras.utils import np_utils
bb = shift_img.reshape(10000, 784)
shift_img_label_OneHot = np_utils.to_categorical(shift_img_label)

In [None]:
print("Accuracy:", sess.run(accuracy,
                           feed_dict={X: bb,
                                      Y: shift_img_label_OneHot}))