from __future__ import print_function
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
import datetime


def average_gradients(tower_grads):
  average_grads = []
  for grad_and_vars in zip(*tower_grads):
    # Note that each grad_and_vars looks like the following:
    #   ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
    grads = []
    for g, _ in grad_and_vars:
      # Add 0 dimension to the gradients to represent the tower.
      expanded_g = tf.expand_dims(g, 0)

      # Append on a 'tower' dimension which we will average over below.
      grads.append(expanded_g)

    # Average over the 'tower' dimension.
    grad = tf.concat(axis=0, values=grads)
    grad = tf.reduce_mean(grad, 0)

    # Keep in mind that the Variables are redundant because they are shared
    # across towers. So .. we will just return the first tower's pointer to
    # the Variable.
    v = grad_and_vars[0][1]
    grad_and_var = (grad, v)
    average_grads.append(grad_and_var)
    return average_grads


with tf.device('/cpu:0'):
    x  = tf.placeholder(tf.float32, [None, 784], name='x')
    x_img = tf.reshape(x, [-1, 28, 28, 1])
    x_split = tf.split(x_img,2)

    y = tf.placeholder(tf.float32, [None, 10],  name='y')
    y_split = tf.split(y,2)

    keep_prob = tf.placeholder(tf.float32)

    w0=tf.get_variable('w0',initializer=tf.truncated_normal([5, 5,1,32], stddev=0.1))
    b0=tf.get_variable('b0',initializer=tf.truncated_normal([32], stddev=0.1))

    w1=tf.get_variable('w1',initializer=tf.truncated_normal([5,5,32,64], stddev=0.1))
    b1=tf.get_variable('b1',initializer=tf.truncated_normal([64], stddev=0.1))

    w2=tf.get_variable('w2',initializer=tf.truncated_normal([7*7*64,1024], stddev=0.1))
    b2=tf.get_variable('b2',initializer=tf.truncated_normal([1024], stddev=0.1))

    w3=tf.get_variable('w3',initializer=tf.truncated_normal([1024,10], stddev=0.1))
    b3=tf.get_variable('b3',initializer=tf.truncated_normal([10], stddev=0.1))

    outputs=[]
    grads=[]

    opt=tf.train.GradientDescentOptimizer(0.01)
    

def conv2d(xx, W):
    return tf.nn.conv2d(xx, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(xx):
    return tf.nn.max_pool(xx, ksize=[1, 2, 2, 1],strides=[1, 2, 2, 1], padding='SAME')


def model_forward(xx,labels):
    h_conv1=tf.nn.relu(conv2d(xx,w0)+b0);
    h_pool1=max_pool_2x2(h_conv1)

    h_conv2=tf.nn.relu(conv2d(h_pool1,w1)+b1);
    h_pool2=max_pool_2x2(h_conv2)

    h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])

    h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat,w2)+b2)

    h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

    y = tf.matmul(h_fc1_drop,w3)+b3
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y, labels=labels))
    grad= opt.compute_gradients(loss,tf.trainable_variables())

    return y,loss,grad


for i in range(0,2):
    with tf.device(('/gpu:{0}').format(i)):
        yy,loss,grad=model_forward(x_split[i],y_split[i])
        outputs.append(yy)
        grads.append(grad)


with tf.device('/cpu:0'):
    grad_avg=average_gradients(grads)
    train_step = opt.apply_gradients(grad_avg)

    output = tf.concat(outputs, axis=0)
    correct_prediction = tf.equal(tf.argmax(output, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')


def main():

    mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

    with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:

        sess.run(tf.global_variables_initializer())
        writer = tf.summary.FileWriter('C:\\tmp\\test\\', graph=tf.get_default_graph())

        t1_1 = datetime.datetime.now()
        for step in range(0,10000):
            batch_x, batch_y = mnist.train.next_batch(100)
            sess.run(train_step, feed_dict={x: batch_x, y: batch_y, keep_prob: 0.5})

            if (step % 200) == 0:
                print(step, sess.run(accuracy, feed_dict={x: mnist.test.images, y: mnist.test.labels, keep_prob: 1}))
            
        t2_1 = datetime.datetime.now()

    print("Computation time: " + str(t2_1-t1_1))


if __name__ == "__main__":
    main()