In [1]:
import tensorflow as tf

## 创建一个计算图并在会话中执行

In [2]:
x = tf.Variable(3, name='x')
y = tf.Variable(4, name='y')
f = x * x * y + y + 2

Instructions for updating:
Colocations handled automatically by placer.


In [3]:
with tf.Session() as sess:
    x.initializer.run()
    y.initializer.run()
    result = f.eval()
    print(result)

42


In [4]:
init = tf.global_variables_initializer()

with tf.Session() as sess:
    init.run()
    result = f.eval()
    print(result)

42


## 管理图

In [5]:
x1 = tf.Variable(1)
x1.graph is tf.get_default_graph()

True

In [6]:
# 创建的结点会自动添加到默认图上，如果想要管理多个互不依赖的图，可以创建一个新的图，用with块临时将它设置为默认图
graph = tf.Graph()
with graph.as_default():
    x2 = tf.Variable(2)
    
print(x2.graph is graph)
print(x2.graph is tf.get_default_graph())

True
False


## 节点的生命周期

In [7]:
w = tf.constant(3)
x = w + 2
y = x + 5
z = x * 3

with tf.Session() as sess:
    print(y.eval())
    print(z.eval())

10
15


在图的每次执行过程中，所有的节点值都会被丢弃，但是变量的值不会，因为变量的值是由会话维护的。变量的生命周期从初始化器的执行开始，到关闭会话结束。

In [8]:
# 上面在对y和z求值时会重复计算w和x重复求值，若不希望对y和z重复求值，则可以告诉TF在一次图的执行中完成y和z的求值
with tf.Session() as sess:
    y_val, z_val = sess.run([y, z])
    print(y_val)
    print(z_val)

10
15


## TensorFlow中的线性回归

In [9]:
import numpy as np
from sklearn.datasets import fetch_california_housing

housing = fetch_california_housing()
m, n = housing.data.shape
# np.c_中的c是column（列）的缩写，是按列叠加两个矩阵的意思，也可以说是按行连接两个矩阵，就是把两矩阵左右相加，要求行数相等，类似于pandas中的merge()
housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data]

X = tf.constant(housing_data_plus_bias, dtype=tf.float32, name='X')
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name='y')
XT = tf.transpose(X)
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y)  # 正规解

# 与numpy计算正规方程相比，使用tf可以自动将计算分发到gpu上去，如果可以的话
with tf.Session() as sess:
    theta_value = theta.eval()
    print(theta_value)

[[-3.7171074e+01]
 [ 4.3633682e-01]
 [ 9.3871783e-03]
 [-1.0717344e-01]
 [ 6.4540231e-01]
 [-4.1238391e-06]
 [-3.7809242e-03]
 [-4.2373490e-01]
 [-4.3720812e-01]]


In [10]:
# 对特征向量做归一化
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler().fit(housing_data_plus_bias)
scaler_housing_data_plus_bias = scaler.transform(housing_data_plus_bias)

## 实现梯度下降

### 手工计算梯度

In [11]:
n_epochs = 1000
learning_rate = 0.1

X = tf.constant(scaler_housing_data_plus_bias, dtype=tf.float32, name='X')
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name='y')
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name='theta')
y_pred = tf.matmul(X, theta, name='predictions')
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name='mse')

gradients = 2 / m * tf.matmul(tf.transpose(X), error)
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        
        if epoch % 100 == 0:
#             print('theta=', theta.eval())
            print('epoch: ', epoch, ' mse=', mse.eval())
        sess.run(training_op)
    
    best_theta = theta.eval()

epoch:  0  mse= 5.849907
epoch:  100  mse= 4.8064346
epoch:  200  mse= 4.8034205
epoch:  300  mse= 4.803268
epoch:  400  mse= 4.8032556
epoch:  500  mse= 4.8032537
epoch:  600  mse= 4.8032546
epoch:  700  mse= 4.803254
epoch:  800  mse= 4.8032546
epoch:  900  mse= 4.8032537


### 使用自动微分

In [12]:
from sklearn.preprocessing import StandardScaler

n_epochs = 1000
learning_rate = 0.1

X = tf.constant(scaler_housing_data_plus_bias, dtype=tf.float32, name='X')
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name='y')
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name='theta')
y_pred = tf.matmul(X, theta, name='predictions')
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name='mse')

gradients = tf.gradients(mse, [theta])[0]
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        
        if epoch % 100 == 0:
#             print('theta=', theta.eval())
            print('epoch: ', epoch, ' mse=', mse.eval())
        sess.run(training_op)
    
    best_theta = theta.eval()

epoch:  0  mse= 5.299648
epoch:  100  mse= 4.81614
epoch:  200  mse= 4.804929
epoch:  300  mse= 4.8035064
epoch:  400  mse= 4.8032937
epoch:  500  mse= 4.80326
epoch:  600  mse= 4.8032546
epoch:  700  mse= 4.8032546
epoch:  800  mse= 4.8032537
epoch:  900  mse= 4.8032537


### 使用优化器

In [13]:
n_epochs = 1000
learning_rate = 0.1

X = tf.constant(scaler_housing_data_plus_bias, dtype=tf.float32, name='X')
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name='y')
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name='theta')
y_pred = tf.matmul(X, theta, name='predictions')
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name='mse')

optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        
        if epoch % 100 == 0:
#             print('theta=', theta.eval())
            print('epoch: ', epoch, ' mse=', mse.eval())
        sess.run(training_op)
    
    best_theta = theta.eval()

epoch:  0  mse= 8.597808
epoch:  100  mse= 4.8066044
epoch:  200  mse= 4.8033886
epoch:  300  mse= 4.803261
epoch:  400  mse= 4.803254
epoch:  500  mse= 4.8032537
epoch:  600  mse= 4.8032546
epoch:  700  mse= 4.803254
epoch:  800  mse= 4.803254
epoch:  900  mse= 4.8032537


## 给训练算法提供数据

In [14]:
n_epochs = 10
learning_rate = 0.1

# 创建占位符节点
X = tf.placeholder(tf.float32, shape=(None, n + 1), name='X')
y = tf.placeholder(tf.float32, shape=(None, 1), name='y')

batch_size = 1000
n_batches = int(np.ceil(m / batch_size))

# 创建batch
def fetch_batch(epoch, batch_index, batch_size):
    np.random.seed(epoch * n_batches + batch_index) 
    indices = np.random.randint(m, size=batch_size)
    return scaler_housing_data_plus_bias[indices] , housing.target.reshape(-1, 1)[indices]

with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            print('batch index: ', batch_index, ' mse=', mse.eval())
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        print('epoch: ', epoch, ' mse=', mse.eval())
    best_theta = theta.eval()

batch index:  0  mse= 7.2183957
batch index:  1  mse= 6.274323
batch index:  2  mse= 5.782699
batch index:  3  mse= 5.5087514
batch index:  4  mse= 5.3468623
batch index:  5  mse= 5.246343
batch index:  6  mse= 5.181192
batch index:  7  mse= 5.1372404
batch index:  8  mse= 5.1063743
batch index:  9  mse= 5.083768
batch index:  10  mse= 5.066475
batch index:  11  mse= 5.052661
batch index:  12  mse= 5.0411663
batch index:  13  mse= 5.0312524
batch index:  14  mse= 5.0224433
batch index:  15  mse= 5.014431
batch index:  16  mse= 5.007013
batch index:  17  mse= 5.000056
batch index:  18  mse= 4.9934697
batch index:  19  mse= 4.987194
batch index:  20  mse= 4.9811854
epoch:  0  mse= 4.9754143
batch index:  0  mse= 4.9754143
batch index:  1  mse= 4.969859
batch index:  2  mse= 4.9645023
batch index:  3  mse= 4.9593325
batch index:  4  mse= 4.9543386
batch index:  5  mse= 4.949511
batch index:  6  mse= 4.9448447
batch index:  7  mse= 4.9403296
batch index:  8  mse= 4.9359627
batch index:  9 

## 保存和恢复模型

In [15]:
n_epochs = 1000
learning_rate = 0.1

X = tf.constant(scaler_housing_data_plus_bias, dtype=tf.float32, name='X')
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name='y')
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name='theta')
y_pred = tf.matmul(X, theta, name='predictions')
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name='mse')

optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

# saver
saver = tf.train.Saver()
# 按照变量名保存和恢复变量
# saver = tf.train.Saver({'weight': theta})

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        
        if epoch % 100 == 0:
            print('epoch: ', epoch, ' mse=', mse.eval())
            saver.save(sess, 'model/model.ckpt')
        sess.run(training_op)
    
    best_theta = theta.eval()
    saver.save(sess, 'model/model_final.ckpt')

epoch:  0  mse= 5.836144
epoch:  100  mse= 4.8203287
epoch:  200  mse= 4.8043103
epoch:  300  mse= 4.8033605
epoch:  400  mse= 4.803268
epoch:  500  mse= 4.8032556
epoch:  600  mse= 4.8032537
epoch:  700  mse= 4.803253
epoch:  800  mse= 4.803254
epoch:  900  mse= 4.8032537


In [16]:
# 恢复
with tf.Session() as sess:
    saver.restore(sess, 'model/model_final.ckpt')
    mse = sess.run(mse)
    print(mse)

Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from model/model_final.ckpt
4.8032537


## 用TensorBoard来可视化图和训练曲线

In [17]:
from datetime import datetime

now = datetime.utcnow().strftime('%Y%m%d%H%M%S')
root_logdir = 'tf_logs'
logdir = '{}/run-{}/'.format(root_logdir, now)

n_epochs = 10
learning_rate = 0.1

# 创建占位符节点
X = tf.placeholder(tf.float32, shape=(None, n + 1), name='X')
y = tf.placeholder(tf.float32, shape=(None, 1), name='y')

batch_size = 1000
n_batches = int(np.ceil(m / batch_size))

theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name='theta')
y_pred = tf.matmul(X, theta, name='predictions')
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name='mse')

optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

mse_summary = tf.summary.scalar('MSE', mse)
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

init = tf.global_variables_initializer()

# 创建batch
def fetch_batch(epoch, batch_index, batch_size):
    np.random.seed(epoch * n_batches + batch_index) 
    indices = np.random.randint(m, size=batch_size)
    return scaler_housing_data_plus_bias[indices] , housing.target.reshape(-1, 1)[indices]

with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            if batch_index % 10 == 0:
                summary_str = mse_summary.eval(feed_dict={X: X_batch, y: y_batch})
                print(summary_str)
                step = epoch * n_batches + batch_index
                file_writer.add_summary(summary_str, step)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
    best_theta = theta.eval()
    file_writer.close()

b'\n\x0c\n\x05MSE_5\x15X}\xfd@'
b'\n\x0c\n\x05MSE_5\x15\x1dZ\x99@'
b"\n\x0c\n\x05MSE_5\x15'\xd8\xa3@"
b'\n\x0c\n\x05MSE_5\x15/i\xa9@'
b'\n\x0c\n\x05MSE_5\x15w\xc6\x9f@'
b'\n\x0c\n\x05MSE_5\x15)r\x9e@'
b'\n\x0c\n\x05MSE_5\x150\xe2\x9f@'
b'\n\x0c\n\x05MSE_5\x15L\x8e\x9c@'
b'\n\x0c\n\x05MSE_5\x15\xc8g\x95@'
b'\n\x0c\n\x05MSE_5\x15\x8d\xd5\x9e@'
b'\n\x0c\n\x05MSE_5\x15\xb0C\xa0@'
b'\n\x0c\n\x05MSE_5\x15\xe2\xd4\xa0@'
b'\n\x0c\n\x05MSE_5\x151i\x9d@'
b'\n\x0c\n\x05MSE_5\x15\xa6\x92\x98@'
b'\n\x0c\n\x05MSE_5\x15\x16\xe7\x90@'
b'\n\x0c\n\x05MSE_5\x15\x8f\xdc\x97@'
b'\n\x0c\n\x05MSE_5\x15o\x7f\x9f@'
b'\n\x0c\n\x05MSE_5\x157\x07\x98@'
b'\n\x0c\n\x05MSE_5\x15\x00\xc4\x95@'
b'\n\x0c\n\x05MSE_5\x15D\xd5\x95@'
b'\n\x0c\n\x05MSE_5\x15\xc3\x91\x9b@'
b'\n\x0c\n\x05MSE_5\x15\x7f_\x99@'
b'\n\x0c\n\x05MSE_5\x15mA\x9e@'
b'\n\x0c\n\x05MSE_5\x15\xe9\x94\x95@'
b'\n\x0c\n\x05MSE_5\x15\xee\x98\x99@'
b'\n\x0c\n\x05MSE_5\x15\xf8\x8e\x9a@'
b'\n\x0c\n\x05MSE_5\x15w\xb8\x98@'
b'\n\x0c\n\x05MSE_5\x15/\x8d\x96@'
b'\n\

- 启动 tensorboard 服务器
```
tensorboard --logdir tf_logs/
```
- 浏览器中输入 [http://localhost:6006]() 即可看到 tensorboard 界面

## 命名作用域

In [18]:
# 在这个作用域中定义的每个操作现在都有一个 ‘my_loss/’ 前缀
with tf.name_scope('my_loss') as scope:
    error = y_pred - y
    mse = tf.reduce_mean(tf.square(error), name='mse')
    
print(error.op.name)
print(mse.op.name)

my_loss/sub
my_loss/mse


## 模块化

In [19]:
def relu(X):
    w_shape = (int(X.get_shape()[1]), 1)
    w = tf.Variable(tf.random_normal(w_shape), name='weights')
    b = tf.Variable(0.0, name='bias')
    z = tf.add(tf.matmul(X, w), b, name='z')
    
    return tf.maximum(z, 0., name='relu')

n_features = 3
X = tf.placeholder(tf.float32, shape=(None, n_features), name='X')
relus = [relu(X) for i in range(5)]
output = tf.add_n(relus, name='output')

## 共享变量

如果想要在图的不同组件中共享变量：
- 最简单的做法是先创建，然后将其作为参数传递给需要他的函数。但是，如果有太多的类似的共享参数，则参数传递便会很痛苦
    ```
    def relu(X, threshold):
    ```

- 也可以在第一次调用时将共享变量设置为函数的一个属性
    ```
    def relu(X):
        if not hasattr(relu, 'threshold'):
            relu.threshold = tf.Variable(0.0, name='max')
    ```
    
- TF提供：
    如果共享变量不存在，该方法先通过get_variable()函数创建共享变量；如果已经存在了，就复用该共享变量。期望的行为通过当前variable_scope()的一个属性来控制（创建或者复用）
    ```
    with tf.variable_scope('relu'):
        threshold = tf.get_variable('threshold', shape=(), initializer=tf.constant_initilizer(0.0))
    ```
    如果这个变量之前已经被get_variable()调用创建过，这里会抛出一个异常。这种机制避免由于误操作而复用变量。如果要复用一个变量，需要通过设置变量作用域的reuse属性为True来显式的实现
    ```
    with tf.variable_scope('relu', reuse=True):
        threshold = tf.get_variable('threshold')
    ```
    这段代码会获取既有的‘relu/threshold’变量，如果该变量不存在，或者在调用get_variable()时没有创建成功，那么会抛出一个异常

In [20]:
def relu(X):
    w_shape = (int(X.get_shape()[1]), 1)
    w = tf.Variable(tf.random_normal(w_shape), name='weights')
    b = tf.Variable(0.0, name='bias')
    z = tf.add(tf.matmul(X, w), b, name='z')
    with tf.variable_scope('relu', reuse=True):
        threshold = tf.get_variable('threshold')
    return tf.maximum(z, threshold, name='max')

n_features = 3
X = tf.placeholder(tf.float32, shape=(None, n_features), name='X')
with tf.variable_scope('relu', reuse=tf.AUTO_REUSE):
      threshold = tf.get_variable('threshold', shape=(), initializer=tf.constant_initializer(0.0))
relus = [relu(X) for i in range(5)]
output = tf.add_n(relus, name='output')