[View in Colaboratory](https://colab.research.google.com/github/samsam915/CEP-Resources/blob/master/mnist_tensorboard.ipynb)

In [61]:
import tensorflow as tf
import time
from tensorflow.examples.tutorials.mnist import input_data
import matplotlib.pyplot as plt
import numpy as np
import os 
from tensorflow.contrib.tensorboard.plugins import projector

#导入数据
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
DIR_log = 'mnist/'
niter = 500
interval = 100

if not os.path.exists(DIR_log):
    os.mkdir(DIR_log)
    
#初始化权重
def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial,name='weight')

#初始化偏差
def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial,name='bias')

#卷积层
def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

#池化层
def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')

#定义卷积和池化层
def conv_pool(x_image,in_shape,out_shape):
            
    W_conv = weight_variable([5,5,in_shape,out_shape])     
    b_conv = bias_variable([out_shape])
    h_conv = tf.nn.relu(conv2d(x_image, W_conv) + b_conv) 
    h_pool = max_pool_2x2(h_conv)                          
    
    conv_feature_shape = h_conv.get_shape()[1]
    pool_feature_shape = h_pool.get_shape()[1]
    
    #可视化特征图
    tf.summary.image('feature_conv', tf.reshape(tf.reduce_mean(h_conv,axis=3),(-1,conv_feature_shape,conv_feature_shape,1)), 3)
    tf.summary.image('feature_pool', tf.reshape(tf.reduce_mean(h_pool,axis=3),(-1,pool_feature_shape,pool_feature_shape,1)), 3)
    
    #数据分布图
    tf.summary.histogram('W_con',W_conv)
    tf.summary.histogram('b_con',b_conv)
    
    return h_pool

#生成全景图像文件。
#tensorboard可以从全景图中按序裁剪出每一幅手写体图像，在页面上独立渲染。
def create_sprite_image(images):
    if isinstance(images, list):
        images = np.array(images)
    img_h = images.shape[1]
    img_w = images.shape[2]
    n_plots = int(np.ceil(np.sqrt(images.shape[0])))
    
    
    spriteimage = np.ones((img_h * n_plots ,img_w * n_plots ))
    
    for i in range(n_plots):
        for j in range(n_plots):
            this_filter = i * n_plots + j
            if this_filter < images.shape[0]:
                this_img = images[this_filter]
                spriteimage[i * img_h:(i + 1) * img_h,
                  j * img_w:(j + 1) * img_w] = this_img
    
    return spriteimage

def vector_to_matrix_mnist(mnist_digits):
    return np.reshape(mnist_digits,(-1,28,28))

def invert_grayscale(mnist_digits):
    return 1-mnist_digits

def save_sprite():
    to_visualise = (mnist.test.images[:10000])
    to_visualise = vector_to_matrix_mnist(to_visualise)
    to_visualise = invert_grayscale(to_visualise)
    sprite_image = create_sprite_image(to_visualise)
    plt.imsave(os.path.join(DIR_log,'mnist_10k_sprite.png'),sprite_image,cmap='gray')
    
class Mnist(object):
    def __init__(self):
        #定义一个会话
        self.sess = tf.Session()
        self.global_step = tf.Variable(0, trainable=False)
        
        #载入图片    
        self.embedding_var = tf.Variable(tf.stack(mnist.test.images[:10000]),trainable=False,name='embedding')
        
    def mnist_network(self):
        with tf.name_scope('a_intput'):
            #模型输入
            self.xs = tf.placeholder(tf.float32, [None, 784],name='x')     
            self.ys = tf.placeholder(tf.float32, [None, 10],name='y') 
            #[n_samples,28*28]的一维向量转为[n_samples,28,28,1]的图像
            x_image = tf.reshape(self.xs, [-1, 28, 28, 1])
            tf.summary.image('_input', x_image, 3)
    
        with tf.name_scope('conv1'):
            ##第一层卷积和池化
            #输入尺寸[n_samples,28,28,1]
            #输出尺寸[n_samples,14,14,32]
            h_pool1 = conv_pool(x_image,1,32)
        
        
        with tf.name_scope('conv2'):
            ##第二层卷积层和池化层
            #输入尺寸[n_samples,14,14,32]
            #输出尺寸[n_samples,7,7,64]
            h_pool2 = conv_pool(h_pool1,32,64)
        
        with tf.name_scope('fc1'):
            #转换为[n_samples,7*7*64]的一维向量
            h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])  
            
            #全连接层fc1
            W_fc1 = weight_variable([7*7*64, 1024])
            b_fc1 = bias_variable([1024])
            h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
            
            #数据分布图
            tf.summary.histogram('W_fc1', W_fc1)
            tf.summary.histogram('b_fc1', b_fc1)
            
            #dropout
            self.keep_prob = tf.placeholder(tf.float32)
            h_fc1_drop = tf.nn.dropout(h_fc1, self.keep_prob)
        
        with tf.name_scope('fc2'):
            #计算概率输出
            W_fc2 = weight_variable([1024, 10])
            b_fc2 = bias_variable([10])
            pred = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
            
            #数据分布图
            tf.summary.histogram('W_fc1', W_fc2)
            tf.summary.histogram('b_fc1', b_fc2)
            
        with tf.name_scope('loss'):
            #交叉熵损失
            cross_entropy = tf.reduce_mean(-tf.reduce_sum(self.ys * tf.log(pred),reduction_indices=[1]))    
            #添加获取交叉熵的汇总操作
            tf.summary.scalar('loss',cross_entropy)  
        with tf.name_scope('train'):
            #优化器
            train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy,global_step=self.global_step)
            
        with tf.name_scope('accuracy'):
            #精确度
            correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(self.ys, 1))
            accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
            #添加获取准确率的汇总操作
            tf.summary.scalar('accuracy',accuracy)
            
        #合并所有的summary
        merged = tf.summary.merge_all()
        
        return train_step, accuracy, merged 

    #可视化
    def vis(self):
        #创建元数据文件，将嵌入变量保存到checkpoint文件中
        metadata_file = os.path.join(DIR_log,'metadata.tsv')
    
        with open(metadata_file,'w') as f:
            for i in  range(10000):
                c = np.nonzero(mnist.test.labels[::1])[1:][0][i]
                f.write('{}\n'.format(c))
    
        #创建投影配置参数
        projector_writer = tf.summary.FileWriter(DIR_log,tf.get_default_graph())
        config = projector.ProjectorConfig()
        embeddings = config.embeddings.add()
        embeddings.tensor_name = self.embedding_var.name
        embeddings.metadata_path = 'metadata.tsv'
    
        #设置全景图文件路径和手写体数字图像的尺寸
        embeddings.sprite.image_path = os.path.join('mnist_10k_sprite.png')
        embeddings.sprite.single_image_dim.extend([28,28])
        #执行可视化方法，讲参数配置写入新传概念的投影配置文件中
        #tensorboard启动时会自动加载该文件中的投影参数配置
        projector.visualize_embeddings(projector_writer,config)
        

    def train(self,train_step, accuracy, merged):
        #初始化所有变量
        self.sess.run(tf.global_variables_initializer())
        #定义事件文件
        self.writer_train = tf.summary.FileWriter(os.path.join(DIR_log,'train'),tf.get_default_graph())
        # 注意此处不需要sess.graph
        self.writer_val = tf.summary.FileWriter(os.path.join(DIR_log,'val'))
        saver = tf.train.Saver()
        
        #开始训练时间
        start_time = time.time()
    
        #开始训练
        for i in range(niter):
            batch_xs, batch_ys = mnist.train.next_batch(100)
            _,step = self.sess.run([train_step,self.global_step], feed_dict={self.xs: batch_xs, self.ys: batch_ys, self.keep_prob: 0.5})
    
            if step % interval == 0:
                #训练集精度
                train_start_time = time.time()
                train_accuracy_res,train_summary = self.sess.run([accuracy,merged], feed_dict={self.xs: batch_xs, self.ys: batch_ys, self.keep_prob: 1.0})
                train_end_time = time.time()
                self.writer_train.add_summary(train_summary,step)
                print( "step %d,  train accuracy %.2g   time: %.3gs" % (step,train_accuracy_res,train_end_time-train_start_time))
    
                #验证集精度
                val_start_time = time.time()
                val_accuracy_res,val_summary = self.sess.run([accuracy,merged], feed_dict={self.xs: mnist.validation.images,self.ys: mnist.validation.labels, self.keep_prob: 1.0})
                val_end_time = time.time()
                self.writer_val.add_summary(val_summary,step)
                print( "           val   accuracy %.2g   time: %.3gs"  % ( val_accuracy_res,val_end_time-val_start_time))
    
        end_time = time.time()
        print('total time: %gs' %  (end_time-start_time))
    
        saver.save(self.sess, os.path.join(DIR_log,"model.cpkt"),global_step=self.global_step)

        print ("test accuracy %g" % self.sess.run(accuracy, feed_dict={self.xs : mnist.test.images,self.ys: mnist.test.labels, self.keep_prob: 1.0}))
        
        self.sess.close()
        print("DONE")
    
    def retrain(self,train_step, accuracy, merged):
        #定义事件文件
        self.writer_train = tf.summary.FileWriter(os.path.join(DIR_log,'train'),tf.get_default_graph())
        # 注意此处不需要sess.graph
        self.writer_val = tf.summary.FileWriter(os.path.join(DIR_log,'val'))
        saver = tf.train.Saver()
        
        ckpt = tf.train.get_checkpoint_state(DIR_log)
        step = int(ckpt.model_checkpoint_path.split('-')[-1])
        saver.restore(self.sess,ckpt.model_checkpoint_path)
        
        #开始训练时间
        start_time = time.time()
        
        #开始训练
        for i in range(step,niter):
            batch_xs, batch_ys = mnist.train.next_batch(100)
            _,step = self.sess.run([train_step,self.global_step], feed_dict={self.xs: batch_xs, self.ys: batch_ys, self.keep_prob: 0.5})
    
            if step % interval == 0:
                #训练集精度
                train_start_time = time.time()
                train_accuracy_res,train_summary = self.sess.run([accuracy,merged], feed_dict={self.xs: batch_xs, self.ys: batch_ys, self.keep_prob: 1.0})
                train_end_time = time.time()
                self.writer_train.add_summary(train_summary,step)
                print( "step %d,  train accuracy %.2g   time: %.3gs" % (step,train_accuracy_res,train_end_time-train_start_time))
    
                #验证集精度
                val_start_time = time.time()
                val_accuracy_res,val_summary = self.sess.run([accuracy,merged], feed_dict={self.xs: mnist.validation.images,self.ys: mnist.validation.labels, self.keep_prob: 1.0})
                val_end_time = time.time()
                self.writer_val.add_summary(val_summary,step)
                print( "           val   accuracy %.2g   time: %.3gs"  % ( val_accuracy_res,val_end_time-val_start_time))
        saver.save(self.sess, os.path.join(DIR_log,"model.cpkt"),global_step=self.global_step)
        
        end_time = time.time()
        print('total time: %gs' %  (end_time-start_time))
    
        saver.save(self.sess, os.path.join(DIR_log,"model.cpkt"),global_step=self.global_step)

        print ("test accuracy %g" % self.sess.run(accuracy, feed_dict={self.xs : mnist.test.images,self.ys: mnist.test.labels, self.keep_prob: 1.0}))
        
        self.sess.close()
        print("DONE")
         
def cpu():
    with tf.device('/cpu:0'):
        #重置默认的图
        #tf.reset_default_graph()
        
        cpu = Mnist()
        train_step, accuracy, merged  = cpu.mnist_network()
        cpu.vis()
        cpu.train(train_step, accuracy, merged)
        
def gpu():    
    #测试GPU
    device_name = tf.test.gpu_device_name()
    if device_name != '/device:GPU:0':
        raise SystemError('GPU device not found')
    print('Found GPU at: {}'.format(device_name))
    
    #重置默认的图
    tf.reset_default_graph()
    
    gpu = Mnist()
    train_step, accuracy, merged  = gpu.mnist_network()
    gpu.vis()
    
    if not tf.train.get_checkpoint_state(DIR_log):
        gpu.train(train_step, accuracy, merged)
    else:
        gpu.retrain(train_step, accuracy, merged)  
        
if __name__ == "__main__":
    #保存全景图像
    save_sprite()
    #重置默认的图
    #tf.reset_default_graph()
    #print("Using CPU ......")
    #cpu()
    
    print("#"*50)
    print("Using GPU ......")
    gpu()


Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
##################################################
Using GPU ......
Found GPU at: /device:GPU:0
step 100,  train accuracy 0.9   time: 0.221s
           val   accuracy 0.87   time: 0.229s
step 200,  train accuracy 0.92   time: 0.172s
           val   accuracy 0.93   time: 0.308s
step 300,  train accuracy 0.91   time: 0.168s
           val   accuracy 0.94   time: 0.198s
step 400,  train accuracy 0.91   time: 0.169s
           val   accuracy 0.95   time: 0.235s
step 500,  train accuracy 0.97   time: 0.164s
           val   accuracy 0.95   time: 0.228s
total time: 8.63001s
test accuracy 0.9544
DONE


In [0]:
!wget https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
!unzip ngrok-stable-linux-amd64.zip

In [64]:
!cat mnist/projector_config.pbtxt

embeddings {
  tensor_name: "embedding:0"
  metadata_path: "metadata.tsv"
  sprite {
    image_path: "mnist_10k_sprite.png"
    single_image_dim: 28
    single_image_dim: 28
  }
}


In [68]:
LOG_DIR = 'mnist/'
get_ipython().system_raw(
    'tensorboard --logdir={} --host 0.0.0.0 --port 6006 &'
    .format(LOG_DIR)
)
get_ipython().system_raw('./ngrok http 6006 &')
! curl -s http://localhost:4040/api/tunnels | python3 -c \
    "import sys, json; print(json.load(sys.stdin)['tunnels'][0]['public_url'])"

https://10edeb0c.ngrok.io


In [0]:
!rm -r mnist*/

In [0]:
!cp mnist/metadata.tsv mnist/val/

In [6]:
import tensorflow as tf

tf.__version__

'1.11.0-rc2'