## Linear Regression

In [37]:
import tensorflow as tf
import numpy as np

In [None]:
# 使用 NumPy 生成假数据(phony data), 总共 100 个点.
x_data = np.float32(np.random.rand(2, 100)) # 随机输入
y_data = np.dot([0.100, 0.200], x_data) + 0.300

In [None]:
# 构造一个线性模型
# 
b = tf.Variable(tf.zeros([1]))
W = tf.Variable(tf.random_uniform([1, 2], -1.0, 1.0))
y = tf.matmul(W, x_data) + b

In [20]:
# 最小化方差
loss = tf.reduce_mean(tf.square(y - y_data))
optimizer = tf.train.GradientDescentOptimizer(0.5)
train = optimizer.minimize(loss)

# 初始化变量
init = tf.global_variables_initializer()

# 启动图 (graph)，创建一个会话 
sess = tf.Session()
sess.run(init)
#tf.global_variables_initializer().run(session=sess)
# 拟合平面
for step in range(0, 201):
    sess.run(train)
    if step % 20 == 0:
        print(step, sess.run([W, b]))
#关闭当前会话
sess.close()

0 [array([[-0.5526787,  1.027071 ]], dtype=float32), array([ 0.42350525], dtype=float32)]
20 [array([[-0.03690662,  0.3674413 ]], dtype=float32), array([ 0.28259146], dtype=float32)]
40 [array([[ 0.07512739,  0.23839875]], dtype=float32), array([ 0.29263803], dtype=float32)]
60 [array([[ 0.09576317,  0.20909017]], dtype=float32), array([ 0.29740426], dtype=float32)]
80 [array([[ 0.09936837,  0.2022239 ]], dtype=float32), array([ 0.2991558], dtype=float32)]
100 [array([[ 0.09993653,  0.20056173]], dtype=float32), array([ 0.29973716], dtype=float32)]
120 [array([[ 0.10000557,  0.20014605]], dtype=float32), array([ 0.29992023], dtype=float32)]
140 [array([[ 0.10000642,  0.20003897]], dtype=float32), array([ 0.29997617], dtype=float32)]
160 [array([[ 0.10000281,  0.20001058]], dtype=float32), array([ 0.29999298], dtype=float32)]
180 [array([[ 0.10000101,  0.20000292]], dtype=float32), array([ 0.29999793], dtype=float32)]
200 [array([[ 0.10000034,  0.20000082]], dtype=float32), array([ 0.29

## 常量计算

常量计算与其他语言类似，但是如同Spark里面弹性(resilient)变量一样，只有在会话里运行之后，各种运算才开始进行。

In [218]:
# 创建一个常量 op, 产生一个 1x2 矩阵. 这个 op 被作为一个节点
# 加到默认图中.
#
# 构造器的返回值代表该常量 op 的返回值.
matrix1 = tf.constant([[3., 3.]])

# 创建另外一个常量 op, 产生一个 2x1 矩阵.
matrix2 = tf.constant([[2.],[2.]])

# 创建一个矩阵乘法 matmul op , 把 'matrix1' 和 'matrix2' 作为输入.
# 返回值 'product' 代表矩阵乘法的结果.
product = tf.matmul(matrix1, matrix2)


如果不运行会话，矩阵里面没有内容

In [219]:
print(matrix1)

Tensor("Const_23:0", shape=(1, 2), dtype=float32)


In [223]:
print(product)

Tensor("MatMul_1:0", shape=(1, 1), dtype=float32)


In [226]:
# 启动默认图.
sess = tf.Session()

# 调用 sess 的 'run()' 方法来执行矩阵乘法 op, 传入 'product' 作为该方法的参数. 
# 上面提到, 'product' 代表了矩阵乘法 op 的输出, 传入它是向方法表明, 我们希望取回
# 矩阵乘法 op 的输出.
#
# 整个执行过程是自动化的, 会话负责传递 op 所需的全部输入. op 通常是并发执行的.
# 
# 函数调用 'run(product)' 触发了图中三个 op (两个常量 op 和一个矩阵乘法 op) 的执行.
#
# 返回值 'result' 是一个 numpy `ndarray` 对象.
result = sess.run(product)
print(result)
# ==> [[ 12.]]

# 任务完成, 关闭会话.
#


[[ 12.]]


In [229]:
#启动会话运行之后才能得到结果
print(product.eval(session=sess))

[[ 12.]]


In [221]:
print(matrix1.eval(session=sess))

[[ 3.  3.]]


In [222]:
print(sess.run(matrix1))

[[ 3.  3.]]


In [11]:
sess.close()

## 交互式会话

In [13]:
# 进入一个交互式 TensorFlow 会话.
import tensorflow as tf
sess = tf.InteractiveSession()

x = tf.Variable([1.0, 2.0])
a = tf.constant([3.0, 3.0])

# 使用初始化器 initializer op 的 run() 方法初始化 'x' 
x.initializer.run()

# 增加一个减法 sub op, 从 'x' 减去 'a'. 运行减法 op, 输出结果 
sub = tf.sub(x, a)
print(sub.eval())
# ==> [-2. -1.]


[-2. -1.]


## 变量

In [23]:
# 创建一个变量, 初始化为标量 0.
state = tf.Variable(0, name="counter")

# 创建一个 op, 其作用是使 state 增加 1

one = tf.constant(1)
new_value = tf.add(state, one)
update = tf.assign(state, new_value)

# 启动图后, 变量必须先经过`初始化` (init) op 初始化,
# 首先必须增加一个`初始化` op 到图中.
init_op = tf.global_variables_initializer()

# 启动图, 运行 op
with tf.Session() as sess:
  # 运行 'init' op
  sess.run(init_op)
  # 打印 'state' 的初始值
  print(sess.run(state))
  # 运行 op, 更新 'state', 并打印 'state'
  for _ in range(3):
    sess.run(update)
    print(sess.run(state))



0
1
2
3


## Feed机制

通过feed机制可以将Python数据直接传递给tensorflow。

In [30]:
input1 = tf.placeholder(tf.float32)
input2 = tf.placeholder(tf.float32)
output = tf.mul(input1, input2)

with tf.Session() as sess:
  print(sess.run([output], feed_dict={input1:[7.], input2:[2.]}))

[array([ 14.], dtype=float32)]


## MNIST入门

下面例子是通过简单单层神经网络对手写数字进行分类识别。

In [38]:
#读取数据
import tensorflow.examples.tutorials.mnist.input_data as input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [4]:
mnist.train.images.shape

(55000, 784)

In [6]:
mnist.train.labels.shape

(55000, 10)

In [7]:
train_images = mnist.train.images
train_labels = mnist.train.labels

In [11]:
#定义feed数据
#定义输入数据
x = tf.placeholder(tf.float32, [None, 784])
#输入数据对应标签
y_ = tf.placeholder(tf.float32, [None,10])

In [12]:
#定义权重和偏置参数变量
W = tf.Variable(tf.zeros([784,10]))
b = tf.Variable(tf.zeros([10]))

In [13]:
#系统输出标签值 
y = tf.nn.softmax(tf.matmul(x,W) + b)

In [14]:
#定义目标函数，采用交叉熵
cross_entropy = -tf.reduce_sum(y_*tf.log(y))

In [15]:
#定义训练方式，梯度下降法
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)

In [16]:
num_steps = 1001
batch_size = 64
#创建会话
with tf.Session() as sess:
    #初始化所有变量
    init_op = tf.global_variables_initializer()
    sess.run(init_op)
    for step in range(num_steps):
        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        #采用随机梯队下降方法，每次选部分样本进行训练      
        batch_data = train_images[offset:(offset + batch_size), :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        #将数据传递给字典
        feed_dict = {x : batch_data, y_ : batch_labels}
        _, loss = sess.run([train_step, cross_entropy], feed_dict=feed_dict)
        if step%50 == 0:
            #定义模型评价指标精确度
            correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
            accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
            result = sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels})
            print("测试精度：", result)

测试精度： 0.3045
测试精度： 0.8699
测试精度： 0.8743
测试精度： 0.8549
测试精度： 0.9014
测试精度： 0.8833
测试精度： 0.9072
测试精度： 0.9033
测试精度： 0.9099
测试精度： 0.9107
测试精度： 0.9028
测试精度： 0.9096
测试精度： 0.9097
测试精度： 0.8926
测试精度： 0.9151
测试精度： 0.9106
测试精度： 0.9122
测试精度： 0.8925
测试精度： 0.9165
测试精度： 0.9188
测试精度： 0.8992


## MNIST进阶（卷积神经网络）

In [40]:
#创建权重变量
def weight_variable(shape):
  initial = tf.truncated_normal(shape, stddev=0.1)
  return tf.Variable(initial)
#创建偏差
def bias_variable(shape):
  initial = tf.constant(0.1, shape=shape)
  return tf.Variable(initial)

In [41]:
#卷积函数
def conv2d(x, W):
  return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
#池化函数
def max_pool_2x2(x):
  return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1], padding='SAME')


In [49]:
#变量定义
#定义输入数据
x = tf.placeholder(tf.float32, [None, 784])
#期望输出标签
y_ = tf.placeholder(tf.float32, [None,10])

In [50]:
#第一层卷积
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])

x_image = tf.reshape(x, [-1,28,28,1])
#卷积后采用Relu函数激活
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
#进行池化
h_pool1 = max_pool_2x2(h_conv1)

In [51]:
#第二层卷积
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
#卷积后采用Relu函数激活
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
#进行池化
h_pool2 = max_pool_2x2(h_conv2)

In [52]:
#全连接层
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])

h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)


In [53]:
#丢弃，为了增强泛化能力
keep_prob = tf.placeholder("float")
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)


In [54]:
#输出层，softmax
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])

y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)


In [55]:
#损失函数
cross_entropy = -tf.reduce_sum(y_*tf.log(y_conv))
#梯度下降法
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
#计算准确度
correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))


In [59]:
num_steps = 1001
batch_size = 64
with tf.Session() as sess:
    init_op = tf.global_variables_initializer()
    sess.run(init_op)
    for step in range(num_steps):
        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        #产生训练用样本集      
        batch_data = train_images[offset:(offset + batch_size), :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        #数据传递给tensorflow
        feed_dict = {x : batch_data, y_ : batch_labels, keep_prob:0.5}
        sess.run(train_step, feed_dict=feed_dict)
        if step%50 == 0:
            #每50次计算准确度
            result = sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels, keep_prob:1})
            print('Accuracy', result)        

Accuracy 0.1139
Accuracy 0.7469
Accuracy 0.8609
Accuracy 0.8959
Accuracy 0.9129
Accuracy 0.9256
Accuracy 0.9374
Accuracy 0.9373
Accuracy 0.945
Accuracy 0.9445
Accuracy 0.9461
Accuracy 0.9526
Accuracy 0.9536
Accuracy 0.9566
Accuracy 0.9582
Accuracy 0.9581
Accuracy 0.9606
Accuracy 0.9592
Accuracy 0.9633
Accuracy 0.9637
Accuracy 0.9651


## 命名空间

In [37]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import time
import os

In [60]:
#手写数字分为10各类，即0-9
NUM_CLASSES = 10

#图片像素28*28
IMAGE_SIZE = 28
IMAGE_PIXELS = IMAGE_SIZE * IMAGE_SIZE

def inference(images, hidden1_units, hidden2_units):
  """建立双层MNIST神经网络模型

  Args:
    images: 输入的像素值
    hidden1_units: 第一隐层神经元数量
    hidden2_units: 第二隐层神经元数量

  Returns:
    softmax_linear: 归一化后的输出结果
  """
  # 隐层1
  with tf.name_scope('hidden1'):
    #权重
    weights = tf.Variable(
        tf.truncated_normal([IMAGE_PIXELS, hidden1_units],
                            stddev=1.0 / math.sqrt(float(IMAGE_PIXELS))),
        name='weights')
    #偏差
    biases = tf.Variable(tf.zeros([hidden1_units]),
                         name='biases')
    hidden1 = tf.nn.relu(tf.matmul(images, weights) + biases)
  # 隐层2
  with tf.name_scope('hidden2'):
    weights = tf.Variable(
        tf.truncated_normal([hidden1_units, hidden2_units],
                            stddev=1.0 / math.sqrt(float(hidden1_units))),
        name='weights')
    biases = tf.Variable(tf.zeros([hidden2_units]),
                         name='biases')
    hidden2 = tf.nn.relu(tf.matmul(hidden1, weights) + biases)
  # Linear
  with tf.name_scope('softmax_linear'):
    weights = tf.Variable(
        tf.truncated_normal([hidden2_units, NUM_CLASSES],
                            stddev=1.0 / math.sqrt(float(hidden2_units))),
        name='weights')
    biases = tf.Variable(tf.zeros([NUM_CLASSES]),
                         name='biases')
    logits = tf.matmul(hidden2, weights) + biases
  return logits


def loss(logits, labels):
  """计算损失值

  Args:
    logits: 预测值，张量, float - [batch_size, NUM_CLASSES].
    labels: 原始标签值，张量, int32 - [batch_size].

  Returns:
    loss: 损失函数 float.
  """
  #labels = tf.to_int64(labels)
  #cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
      #logits, labels, name='xentropy')
  #loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
  #交叉熵
  y_ = tf.nn.softmax(logits, name='xentropy')
  loss = -tf.reduce_mean(labels*tf.log(y_), name='xentropy_mean')
  return loss


def training(loss, learning_rate):
  """建立训练过程

  创建summarizer以在tensorboard里跟踪损失值

  创建优化器，采用梯度下降方法训练模型


  参数:
    loss: 损失值.
    learning_rate: 学习速率.

  返回:
    train_op: 训练设置.
  """
  # Add a scalar summary for the snapshot loss.
  tf.summary.scalar(loss.op.name, loss)
  # Create the gradient descent optimizer with the given learning rate.
  optimizer = tf.train.GradientDescentOptimizer(learning_rate)
  # Create a variable to track the global step.
  global_step = tf.Variable(0, name='global_step', trainable=False)
  # Use the optimizer to apply the gradients that minimize the loss
  # (and also increment the global step counter) as a single training step.
  train_op = optimizer.minimize(loss, global_step=global_step)
  return train_op


def evaluation(logits, labels):
  """评估模型预测质量.

  参数:
    logits: Logits tensor, float - [batch_size, NUM_CLASSES].
    labels: Labels tensor, int32 - [batch_size], with values in the
      range [0, NUM_CLASSES).

  返回:
    A scalar int32 tensor with the number of examples (out of batch_size)
    that were predicted correctly.
  """
    #计算准确度
  y_hat = tf.nn.softmax(logits, name='xentropy')
  correct_prediction = tf.equal(tf.argmax(y_hat,1), tf.argmax(labels,1))

  return tf.reduce_sum(tf.cast(correct_prediction, tf.int32))


In [61]:
def placeholder_inputs(batch_size):
  """Generate placeholder variables to represent the input tensors.

  These placeholders are used as inputs by the rest of the model building
  code and will be fed from the downloaded data in the .run() loop, below.

  Args:
    batch_size: The batch size will be baked into both placeholders.

  Returns:
    images_placeholder: Images placeholder.
    labels_placeholder: Labels placeholder.
  """
  # Note that the shapes of the placeholders match the shapes of the full
  # image and label tensors, except the first dimension is now batch_size
  # rather than the full size of the train or test data sets.
  images_placeholder = tf.placeholder(tf.float32, shape=(batch_size,
                                                         IMAGE_PIXELS))
  labels_placeholder = tf.placeholder(tf.float32, shape=(batch_size, NUM_CLASSES))
  return images_placeholder, labels_placeholder


def fill_feed_dict(data_set, batch_size, images_pl, labels_pl):
  """Fills the feed_dict for training the given step.

  A feed_dict takes the form of:
  feed_dict = {
      <placeholder>: <tensor of values to be passed for placeholder>,
      ....
  }

  Args:
    data_set: The set of images and labels, from input_data.read_data_sets()
    images_pl: The images placeholder, from placeholder_inputs().
    labels_pl: The labels placeholder, from placeholder_inputs().

  Returns:
    feed_dict: The feed dictionary mapping from placeholders to values.
  """
  # Create the feed_dict for the placeholders filled with the next
  # `batch size` examples.
  images_feed, labels_feed = data_set.next_batch(batch_size)
  feed_dict = {
      images_pl: images_feed,
      labels_pl: labels_feed,
  }
  return feed_dict


def do_eval(sess, batch_size,
            eval_correct,
            images_placeholder,
            labels_placeholder,
            data_set):
  """Runs one evaluation against the full epoch of data.

  Args:
    sess: The session in which the model has been trained.
    eval_correct: The Tensor that returns the number of correct predictions.
    images_placeholder: The images placeholder.
    labels_placeholder: The labels placeholder.
    data_set: The set of images and labels to evaluate, from
      input_data.read_data_sets().
  """
  # And run one epoch of eval.
  true_count = 0  # Counts the number of correct predictions.
  steps_per_epoch = data_set.num_examples // batch_size
  num_examples = steps_per_epoch * batch_size
  for step in range(steps_per_epoch):
    feed_dict = fill_feed_dict(data_set, batch_size,
                               images_placeholder,
                               labels_placeholder)
    true_count += sess.run(eval_correct, feed_dict=feed_dict)
  precision = true_count / num_examples
  print('  Num examples: %d  Num correct: %d  Precision @ 1: %0.04f' %
        (num_examples, true_count, precision))


In [62]:
batch_size = 64
hidden1= 128
hidden2 = 64
learning_rate = 0.1
num_steps = 501
with tf.Graph().as_default():
    # Generate placeholders for the images and labels.
    images_placeholder, labels_placeholder = placeholder_inputs(
        batch_size)

    # Build a Graph that computes predictions from the inference model.
    logits = inference(images_placeholder,
                             hidden1,
                             hidden2)

    # Add to the Graph the Ops for loss calculation.
    #loss = loss(logits, labels_placeholder)
    y_ = tf.log(tf.nn.softmax(logits, name='xentropy'))
    loss = -tf.reduce_mean(labels_placeholder*y_, name='xentropy_mean')

    # Add to the Graph the Ops that calculate and apply gradients.
    train_op = training(loss, learning_rate)

    # Add the Op to compare the logits to the labels during evaluation.
    eval_correct = evaluation(logits, labels_placeholder)

    # Build the summary Tensor based on the TF collection of Summaries.
    summary = tf.merge_all_summaries()


    # Add the variable initializer Op.
    init = tf.global_variables_initializer()

    # Create a saver for writing training checkpoints.
    saver = tf.train.Saver()

    # Create a session for running Ops on the Graph.
    sess = tf.Session()

    # Instantiate a SummaryWriter to output summaries and the Graph.
    summary_writer = tf.summary.FileWriter('save/', sess.graph)

    # And then after everything is built:

    # Run the Op to initialize the variables.
    sess.run(init)
    
    for step in range(num_steps):
        start_time = time.time()
        #data_batch = mnist.train.next_batch(batch_size)
        feed_dict = fill_feed_dict(mnist.train, batch_size, 
                                 images_placeholder,
                                 labels_placeholder)
        _, loss_value = sess.run([train_op, loss],
                               feed_dict=feed_dict)
        duration = time.time() - start_time
        if step % 50 == 0:
            # Print status to stdout.
            print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration))
            # Update the events file.
            summary_str = sess.run(summary, feed_dict=feed_dict)
            summary_writer.add_summary(summary_str, step)
            summary_writer.flush()
        
        if (step + 1) % 100 == 0 or (step + 1) == num_steps:
            checkpoint_file = os.path.join('save/', 'checkpoint')
            saver.save(sess, checkpoint_file, global_step=step)
            # Evaluate against the training set.
            print('Training Data Eval:')
            #train_data = (mnist.train.images, mnist.train.labels)
            #feed_dict = {images_placeholder:train_data[0], labels_placeholder:train_data[1]}
            #accuracy = sess.run(eval_correct, feed_dict=feed_dict)
            #print(accuracy)
            do_eval(sess, batch_size, 
                eval_correct,
                images_placeholder,
                labels_placeholder,
                mnist.train)
            # Evaluate against the validation set.
            print('Validation Data Eval:')
            do_eval(sess, batch_size,
                eval_correct,
                images_placeholder,
                labels_placeholder,
                mnist.validation)
            # Evaluate against the test set.
            print('Test Data Eval:')
            do_eval(sess, batch_size,
                eval_correct,
                images_placeholder,
                labels_placeholder,
                mnist.test)
            
    sess.close()

Instructions for updating:
Please switch to tf.summary.merge_all.
Instructions for updating:
Please switch to tf.summary.merge.
Step 0: loss = 0.23 (0.047 sec)
Step 50: loss = 0.23 (0.016 sec)
Training Data Eval:
  Num examples: 54976  Num correct: 15043  Precision @ 1: 0.2736
Validation Data Eval:
  Num examples: 4992  Num correct: 1335  Precision @ 1: 0.2674
Test Data Eval:
  Num examples: 9984  Num correct: 2815  Precision @ 1: 0.2820
Step 100: loss = 0.22 (0.016 sec)
Step 150: loss = 0.20 (0.016 sec)
Training Data Eval:
  Num examples: 54976  Num correct: 32525  Precision @ 1: 0.5916
Validation Data Eval:
  Num examples: 4992  Num correct: 2974  Precision @ 1: 0.5958
Test Data Eval:
  Num examples: 9984  Num correct: 5934  Precision @ 1: 0.5944
Step 200: loss = 0.20 (0.047 sec)
Step 250: loss = 0.19 (0.016 sec)
Training Data Eval:
  Num examples: 54976  Num correct: 39660  Precision @ 1: 0.7214
Validation Data Eval:
  Num examples: 4992  Num correct: 3633  Precision @ 1: 0.7278
Tes

##可视化命令python D:\Anaconda2\envs\tensorflow\Lib\site-packages\tensorflow\tensorboard  --logdir=“”

## 可视化

In [146]:
num_steps = 1001
learning_rate = 0.1
summaries_dir = 'summary_dir'
fake_data = True
dropout = 0.5

In [147]:
# We can't initialize these variables to 0 - the network will get stuck.
def weight_variable(shape):
    """Create a weight variable with appropriate initialization."""
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    """Create a bias variable with appropriate initialization."""
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

def variable_summaries(var, name):
    """Attach a lot of summaries to a Tensor."""
    with tf.name_scope('summaries'):
      mean = tf.reduce_mean(var)
      tf.summary.scalar('mean/' + name, mean)
      with tf.name_scope('stddev'):
        stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
      tf.summary.scalar('stddev/' + name, stddev)
      tf.summary.scalar('max/' + name, tf.reduce_max(var))
      tf.summary.scalar('min/' + name, tf.reduce_min(var))
      tf.summary.histogram(name, var)
        
def nn_layer(input_tensor, input_dim, output_dim, layer_name, act=tf.nn.relu):
    """Reusable code for making a simple neural net layer.

    It does a matrix multiply, bias add, and then uses relu to nonlinearize.
    It also sets up name scoping so that the resultant graph is easy to read,
    and adds a number of summary ops.
    """
    # Adding a name scope ensures logical grouping of the layers in the graph.
    with tf.name_scope(layer_name):
      # This Variable will hold the state of the weights for the layer
      with tf.name_scope('weights'):
        weights = weight_variable([input_dim, output_dim])
        variable_summaries(weights, layer_name + '/weights')
      with tf.name_scope('biases'):
        biases = bias_variable([output_dim])
        variable_summaries(biases, layer_name + '/biases')
      with tf.name_scope('Wx_plus_b'):
        preactivate = tf.matmul(input_tensor, weights) + biases
        tf.summary.histogram(layer_name + '/pre_activations', preactivate)
      activations = act(preactivate, name='activation')
      tf.summary.histogram(layer_name + '/activations', activations)
      return activations

def feed_dict(train):
    """Make a TensorFlow feed_dict: maps data onto Tensor placeholders."""
    if train:
      xs, ys = mnist.train.next_batch(100)
      k = 0.8
    else:
      xs, ys = mnist.test.images, mnist.test.labels
      k = 1.0
    return {x: xs, y_: ys, keep_prob: k}

In [148]:
xs, ys = mnist.train.next_batch(100)

In [149]:
graph2 = tf.Graph()
with graph2.as_default():
    sess = tf.InteractiveSession(graph=graph2)
    # Create a multilayer model.
    # Input placeholders
    with tf.name_scope('input'):
        x = tf.placeholder(tf.float32, [None, 784], name='x-input')
        y_ = tf.placeholder(tf.float32, [None, 10], name='y-input')
    #x = tf.placeholder(tf.int32, [None, 784])
    #y_ = tf.placeholder(tf.int32, [None, 10])

    with tf.name_scope('input_reshape'):
        image_shaped_input = tf.reshape(x, [-1, 28, 28, 1])
        tf.summary.image('input', image_shaped_input, 10)

    hidden1 = nn_layer(x, 784, 500, 'layer1')

    with tf.name_scope('dropout'):
        keep_prob = tf.placeholder(tf.float32)
        tf.summary.scalar('dropout_keep_probability', keep_prob)
        dropped = tf.nn.dropout(hidden1, keep_prob)
        # Do not apply softmax activation yet, see below.
        y = nn_layer(dropped, 500, 10, 'layer2', act=tf.identity)
        with tf.name_scope('cross_entropy'):
            # The raw formulation of cross-entropy,
            ## tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(tf.softmax(y)),
            #                               reduction_indices=[1]))
            #
            # can be numerically unstable.
            #
            # So here we use tf.nn.softmax_cross_entropy_with_logits on the
            # raw outputs of the nn_layer above, and then average across
            # the batch.
            diff = tf.nn.softmax_cross_entropy_with_logits(y, y_)
            with tf.name_scope('total'):
                cross_entropy = tf.reduce_mean(diff)
                tf.summary.scalar('cross_entropy', cross_entropy)

    with tf.name_scope('train'):
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy)

    with tf.name_scope('accuracy'):
        with tf.name_scope('correct_prediction'):
            correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
        with tf.name_scope('accuracy'):
            accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
            tf.summary.scalar('accuracy', accuracy)
    # Merge all the summaries and write them out to /tmp/mnist_logs (by default)
    merged = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter(summaries_dir + '/train', sess.graph)
    test_writer = tf.summary.FileWriter(summaries_dir + '/test')

    tf.global_variables_initializer().run()

    # Train the model, and also write summaries.
    # Every 10th step, measure test-set accuracy, and write test summaries
    # All other steps, run train_step on training data, & add training summaries

    for i in range(num_steps):
        if i % 50 == 0:  # Record summaries and test-set accuracy
            summary, acc = sess.run([merged, accuracy], feed_dict=feed_dict(False))
      
            #test_writer.add_summary(summary, i)
            print('Accuracy at step %s: %s' % (i, acc))
        else:  # Record train set summaries, and train
            if i % 100 == 99:  # Record execution stats
                run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
                run_metadata = tf.RunMetadata()
                summary, _ = sess.run([merged, train_step],
                              feed_dict=feed_dict(True),
                              options=run_options,
                              run_metadata=run_metadata)
                train_writer.add_run_metadata(run_metadata, 'step%03d' % i)
                train_writer.add_summary(summary, i)
                print('Adding run metadata for', i)
            else:  # Record a summary
                summary, _ = sess.run([merged, train_step], feed_dict=feed_dict(True))
                train_writer.add_summary(summary, i)
    train_writer.close()
    test_writer.close()

Accuracy at step 0: 0.082
Accuracy at step 50: 0.7661
Adding run metadata for 99
Accuracy at step 100: 0.8354
Accuracy at step 150: 0.8249
Adding run metadata for 199
Accuracy at step 200: 0.8398
Accuracy at step 250: 0.8669
Adding run metadata for 299
Accuracy at step 300: 0.8757
Accuracy at step 350: 0.8701
Adding run metadata for 399
Accuracy at step 400: 0.8377
Accuracy at step 450: 0.8518
Adding run metadata for 499
Accuracy at step 500: 0.822
Accuracy at step 550: 0.862
Adding run metadata for 599
Accuracy at step 600: 0.8533
Accuracy at step 650: 0.8734
Adding run metadata for 699
Accuracy at step 700: 0.8565
Accuracy at step 750: 0.8352
Adding run metadata for 799
Accuracy at step 800: 0.8281
Accuracy at step 850: 0.7548
Adding run metadata for 899
Accuracy at step 900: 0.8296
Accuracy at step 950: 0.7454
Adding run metadata for 999
Accuracy at step 1000: 0.808


In [118]:
xs, ys = mnist.test.images, mnist.test.labels

## 循环神经网络

这里使用LSTM对数字进行识别，具体原理可 参考这篇[文章](http://arxiv.org/pdf/1402.1128v1.pdf)。

In [206]:
#手写数字分为10各类，即0-9
NUM_CLASSES = 10
#图片像素28*28
IMAGE_SIZE = 28
IMAGE_PIXELS = IMAGE_SIZE * IMAGE_SIZE
#RNN内部神经元节点数目
num_nodes = 64
#训练样本群规模
batch_size = 128
#测试样本群
test_size = len(mnist.test.images)
#参数初始化
initial = 0.01
#构建图
graph3 = tf.Graph()
with graph3.as_default():
  
  # Parameters(weights):
  # 输入门: input, previous output, and bias.
  #weights for the input data x(t)
  ix = tf.Variable(tf.truncated_normal([IMAGE_PIXELS, num_nodes], -initial, initial))
  #weights for the last output h(t-1)
  im = tf.Variable(tf.truncated_normal([num_nodes, num_nodes], -initial, initial))
  #biase
  ib = tf.Variable(tf.zeros([1, num_nodes]))#bias
  # 遗忘门: input, previous output, and bias.
  #weights for the input i(t)
  fx = tf.Variable(tf.truncated_normal([IMAGE_PIXELS, num_nodes], -initial, initial))
  #weights for last output h(t-1)
  fm = tf.Variable(tf.truncated_normal([num_nodes, num_nodes], -initial, initial))
  fb = tf.Variable(tf.zeros([1, num_nodes]))
  # 记忆细胞: input, state and bias.    
  #weights for the input data x(t)
  cx = tf.Variable(tf.truncated_normal([IMAGE_PIXELS, num_nodes], -initial, initial))
  #weights for the last output h(t-1)
  cm = tf.Variable(tf.truncated_normal([num_nodes, num_nodes], -initial, initial))
  cb = tf.Variable(tf.zeros([1, num_nodes]))
  # 输出门: input, previous output, and bias.
  ox = tf.Variable(tf.truncated_normal([IMAGE_PIXELS, num_nodes], -initial, initial))
  om = tf.Variable(tf.truncated_normal([num_nodes, num_nodes], -initial, initial))
  ob = tf.Variable(tf.zeros([1, num_nodes]))
  # Variables saving state across unrollings.
  saved_output = tf.Variable(tf.zeros([batch_size, num_nodes]), trainable=False)
  saved_state = tf.Variable(tf.zeros([batch_size, num_nodes]), trainable=False)
  # Classifier weights and biases, multiply the output of LSTM cell and get the
  # predictionss
  w = tf.Variable(tf.truncated_normal([num_nodes, NUM_CLASSES], -initial, initial))
  b = tf.Variable(tf.zeros([NUM_CLASSES]))
  
  # LSTM细胞内部计算定义
  def lstm_cell(i, o, state):
    """Create a LSTM cell. 
    Note that in this formulation, we omit the various connections between the
    previous state and the gates."""
    #输入门
    input_gate = tf.sigmoid(tf.matmul(i, ix) + tf.matmul(o, im) + ib)#i(t)
    #遗忘门
    forget_gate = tf.sigmoid(tf.matmul(i, fx) + tf.matmul(o, fm) + fb)#f(t)
    update = tf.matmul(i, cx) + tf.matmul(o, cm) + cb
    #候任状态
    candidate_state = tf.tanh(update) #C~(t)   
    #最终状态
    state = forget_gate * state + input_gate * candidate_state#C(t)
    #输出门
    output_gate = tf.sigmoid(tf.matmul(i, ox) + tf.matmul(o, om) + ob)#O(t)
    #输出结果
    output = output_gate * tf.tanh(state)#h(t)
    return output, state

  #images_placeholder, labels_placeholder = placeholder_inputs(batch_size)
  images_placeholder = tf.placeholder(tf.float32, shape=(None, IMAGE_PIXELS))
  labels_placeholder = tf.placeholder(tf.float32, shape=(None, NUM_CLASSES))
  output = saved_output
  state = saved_state
  output, state = lstm_cell(images_placeholder, output, state)

  # 将当前状态和输出值记录下来
  with tf.control_dependencies([saved_output.assign(output),
                                saved_state.assign(state)]):
    # Classifier.
    #预测输出结果，非归一化
    logits = tf.nn.xw_plus_b(output, w, b)#
    #损失函数
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits
                              (logits, labels_placeholder))
  optimizer = tf.train.AdamOptimizer().minimize(loss)
  
  #计算测试结果
  sample_input = tf.placeholder(tf.float32, shape=[test_size, IMAGE_PIXELS])
  sample_label = tf.placeholder(tf.float32, shape=[test_size, NUM_CLASSES])
  saved_sample_output = tf.Variable(tf.zeros([test_size, num_nodes]))
  saved_sample_state = tf.Variable(tf.zeros([test_size, num_nodes]))
  reset_sample_state = tf.group(
    saved_sample_output.assign(tf.zeros([test_size, num_nodes])),
    saved_sample_state.assign(tf.zeros([test_size, num_nodes])))
  sample_output, sample_state = lstm_cell(
    sample_input, saved_sample_output, saved_sample_state)
  with tf.control_dependencies([saved_sample_output.assign(sample_output),
                                saved_sample_state.assign(sample_state)]):
    sample_prediction = tf.nn.softmax(tf.nn.xw_plus_b(sample_output, w, b))
      #计算准确度
    correct_prediction = tf.equal(tf.argmax(sample_prediction,1), tf.argmax(sample_label,1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))


In [207]:
num_steps = 3001
with tf.Session(graph=graph3) as sess:
    #初始化所有变量
    init = tf.global_variables_initializer()
    sess.run(init)
    for i in range(num_steps):
        #获取组训练数据
        images, labels = mnist.train.next_batch(batch_size)
        feed_dict = {images_placeholder: images, labels_placeholder: labels}
        #进行训练学习
        _, ls = sess.run([optimizer, loss], feed_dict=feed_dict)
        if i%200 == 0:
            print('Training Loss:', ls)
            feed_dict = {sample_input: mnist.test.images, sample_label: mnist.test.labels}
            acc = sess.run(accuracy, feed_dict=feed_dict)
            print('Testing Accuracy:', acc)

Loss: 2.30255
Accuracy: 0.1121
Loss: 1.14141
Accuracy: 0.7773
Loss: 0.620363
Accuracy: 0.8639
Loss: 0.425818
Accuracy: 0.8871
Loss: 0.409023
Accuracy: 0.8979
Loss: 0.258485
Accuracy: 0.9067
Loss: 0.359537
Accuracy: 0.9094
Loss: 0.338571
Accuracy: 0.9134
Loss: 0.308494
Accuracy: 0.9153
Loss: 0.327692
Accuracy: 0.9183
Loss: 0.341893
Accuracy: 0.9178
Loss: 0.292808
Accuracy: 0.9233
Loss: 0.224088
Accuracy: 0.9249
Loss: 0.338388
Accuracy: 0.9268
Loss: 0.106679
Accuracy: 0.929
Loss: 0.170672
Accuracy: 0.929


## 循环神经网络2

下面我们直接用tensorflow自带的循环神经网络模块对手写数字进行分类。具体可以参考这篇[博客](http://blog.topspeedsnail.com/archives/10443)。

In [96]:
#神经网络内部神经元数量
num_nodes = 64
#手写数字分为10各类，即0-9
NUM_CLASSES = 10
#图片像素28*28
IMAGE_SIZE = 28
TIME_STEPS = 28
IMAGE_PIXELS = IMAGE_SIZE * IMAGE_SIZE
def recurrent_neural_network(data):
    layer = {'w_':tf.Variable(tf.random_normal([num_nodes, NUM_CLASSES])),
             'b_':tf.Variable(tf.random_normal([NUM_CLASSES]))}
    #LSTM计算单元设置
    lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_nodes)
    #输入数据尺寸变换
    #Initial Data: batch_size,image_size,image_size
    data = tf.transpose(data, [1,0,2])
    #Then: image_size,batch_size,image_size
    data = tf.reshape(data, [-1, IMAGE_SIZE])
    #Now:image_size*batch_size,TIME_STEPS
    data = tf.split(0, TIME_STEPS, data)
    outputs, status = tf.nn.rnn(lstm_cell, data, dtype=tf.float32)
 
    output = tf.add(tf.matmul(outputs[-1], layer['w_']), layer['b_'])
 
    return output

In [100]:
#构建一个新的图
num_steps = 1001
batch_size = 128
graph4 = tf.Graph()
with graph4.as_default():
    images_placeholder = tf.placeholder(tf.float32, shape=(None, IMAGE_SIZE, IMAGE_SIZE))
    labels_placeholder = tf.placeholder(tf.float32, shape=(None, NUM_CLASSES))   
    logits = recurrent_neural_network(images_placeholder)
    loss = tf.nn.softmax_cross_entropy_with_logits(logits, labels_placeholder)
    loss = tf.reduce_mean(loss)
    optimizer = tf.train.AdamOptimizer().minimize(loss)
    #计算准确度
    predict = tf.nn.softmax(logits)
    correct_prediction = tf.equal(tf.argmax(predict,1), tf.argmax(labels_placeholder,1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    #创建会话进行运算
    sess = tf.Session(graph=graph4)
    init = tf.global_variables_initializer()
    sess.run(init)
    for i in range(num_steps):
        x, y = mnist.train.next_batch(batch_size)
        x = x.reshape([batch_size, IMAGE_SIZE, IMAGE_SIZE])
        feed_dict = {images_placeholder: x, labels_placeholder:y}
        _, l = sess.run([optimizer, loss], feed_dict=feed_dict)
        if i %50 == 0:
            x, y = mnist.test.images, mnist.test.labels
            x = x.reshape([len(x), IMAGE_SIZE, IMAGE_SIZE])
            feed_dict = {images_placeholder: x, labels_placeholder:y}
            acc = sess.run(accuracy, feed_dict=feed_dict)
            print('Accuracy:', acc)
    sess.close()

Accuracy: 0.1019
Accuracy: 0.6228
Accuracy: 0.7867
Accuracy: 0.8611
Accuracy: 0.8845
Accuracy: 0.8851
Accuracy: 0.9154
Accuracy: 0.8998
Accuracy: 0.9249
Accuracy: 0.9368
Accuracy: 0.9336
Accuracy: 0.9407
Accuracy: 0.9407
Accuracy: 0.9481
Accuracy: 0.9484
Accuracy: 0.9572
Accuracy: 0.9574
Accuracy: 0.9609
Accuracy: 0.9596
Accuracy: 0.9581
Accuracy: 0.9612
