In [1]:
import tensorflow as tf

## 创建一个计算图并在会话中执行

In [4]:
x = tf.Variable(3, name='x')
y = tf.Variable(4, name='y')
f = x * x * y + y + 2

In [5]:
with tf.Session() as sess:
    x.initializer.run()
    y.initializer.run()
    result = f.eval()
    print(result)

42


In [6]:
init = tf.global_variables_initializer()

with tf.Session() as sess:
    init.run()
    result = f.eval()
    print(result)

42


## 管理图

In [7]:
x1 = tf.Variable(1)
x1.graph is tf.get_default_graph()

True

In [10]:
# 创建的结点会自动添加到默认图上，如果想要管理多个互不依赖的图，可以创建一个新的图，用with块临时将它设置为默认图
graph = tf.Graph()
with graph.as_default():
    x2 = tf.Variable(2)
    
print(x2.graph is graph)
print(x2.graph is tf.get_default_graph())

True
False


## 节点的生命周期

In [11]:
w = tf.constant(3)
x = w + 2
y = x + 5
z = x * 3

with tf.Session() as sess:
    print(y.eval())
    print(z.eval())

10
15


在图的每次执行过程中，所有的节点值都会被丢弃，但是变量的值不会，因为变量的值是由会话维护的。变量的生命周期从初始化器的执行开始，到关闭会话结束。

In [12]:
# 上面在对y和z求值时会重复计算w和x重复求值，若不希望对y和z重复求值，则可以告诉TF在一次图的执行中完成y和z的求值
with tf.Session() as sess:
    y_val, z_val = sess.run([y, z])
    print(y_val)
    print(z_val)

10
15


## TensorFlow中的线性回归

In [14]:
import numpy as np
from sklearn.datasets import fetch_california_housing

housing = fetch_california_housing()
m, n = housing.data.shape
# np.c_中的c是column（列）的缩写，是按列叠加两个矩阵的意思，也可以说是按行连接两个矩阵，就是把两矩阵左右相加，要求行数相等，类似于pandas中的merge()
housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data]

X = tf.constant(housing_data_plus_bias, dtype=tf.float32, name='X')
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name='y')
XT = tf.transpose(X)
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y)  # 正规解

# 与numpy计算正规方程相比，使用tf可以自动将计算分发到gpu上去，如果可以的话
with tf.Session() as sess:
    theta_value = theta.eval()
    print(theta_value)

[[-3.7171074e+01]
 [ 4.3633682e-01]
 [ 9.3871783e-03]
 [-1.0717344e-01]
 [ 6.4540231e-01]
 [-4.1238391e-06]
 [-3.7809242e-03]
 [-4.2373490e-01]
 [-4.3720812e-01]]


In [32]:
# 对特征向量做归一化
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler().fit(housing_data_plus_bias)
scaler_housing_data_plus_bias = scaler.transform(housing_data_plus_bias)

## 实现梯度下降

### 手工计算梯度

In [35]:
n_epochs = 1000
learning_rate = 0.1

X = tf.constant(scaler_housing_data_plus_bias, dtype=tf.float32, name='X')
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name='y')
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name='theta')
y_pred = tf.matmul(X, theta, name='predictions')
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name='mse')

gradients = 2 / m * tf.matmul(tf.transpose(X), error)
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        
        if epoch % 100 == 0:
#             print('theta=', theta.eval())
            print('epoch: ', epoch, ' mse=', mse.eval())
        sess.run(training_op)
    
    best_theta = theta.eval()

epoch:  0  mse= 6.684296
epoch:  100  mse= 4.8095016
epoch:  200  mse= 4.8035173
epoch:  300  mse= 4.803269
epoch:  400  mse= 4.803256
epoch:  500  mse= 4.8032546
epoch:  600  mse= 4.8032537
epoch:  700  mse= 4.8032537
epoch:  800  mse= 4.8032546
epoch:  900  mse= 4.8032537


### 使用自动微分

In [36]:
from sklearn.preprocessing import StandardScaler

n_epochs = 1000
learning_rate = 0.1

X = tf.constant(scaler_housing_data_plus_bias, dtype=tf.float32, name='X')
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name='y')
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name='theta')
y_pred = tf.matmul(X, theta, name='predictions')
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name='mse')

gradients = tf.gradients(mse, [theta])[0]
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        
        if epoch % 100 == 0:
#             print('theta=', theta.eval())
            print('epoch: ', epoch, ' mse=', mse.eval())
        sess.run(training_op)
    
    best_theta = theta.eval()

epoch:  0  mse= 9.153329
epoch:  100  mse= 4.8080773
epoch:  200  mse= 4.8034444
epoch:  300  mse= 4.803262
epoch:  400  mse= 4.803254
epoch:  500  mse= 4.8032537
epoch:  600  mse= 4.8032546
epoch:  700  mse= 4.803254
epoch:  800  mse= 4.8032537
epoch:  900  mse= 4.8032537


### 使用优化器

In [37]:
n_epochs = 1000
learning_rate = 0.1

X = tf.constant(scaler_housing_data_plus_bias, dtype=tf.float32, name='X')
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name='y')
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name='theta')
y_pred = tf.matmul(X, theta, name='predictions')
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name='mse')

optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        
        if epoch % 100 == 0:
#             print('theta=', theta.eval())
            print('epoch: ', epoch, ' mse=', mse.eval())
        sess.run(training_op)
    
    best_theta = theta.eval()

epoch:  0  mse= 6.417272
epoch:  100  mse= 4.827496
epoch:  200  mse= 4.805356
epoch:  300  mse= 4.803524
epoch:  400  mse= 4.803294
epoch:  500  mse= 4.8032603
epoch:  600  mse= 4.8032546
epoch:  700  mse= 4.8032546
epoch:  800  mse= 4.8032537
epoch:  900  mse= 4.8032537


## 给训练算法提供数据

In [43]:
n_epochs = 10
learning_rate = 0.1

# 创建占位符节点
X = tf.placeholder(tf.float32, shape=(None, n + 1), name='X')
y = tf.placeholder(tf.float32, shape=(None, 1), name='y')

batch_size = 1000
n_batches = int(np.ceil(m / batch_size))

# 创建batch
def fetch_batch(epoch, batch_index, batch_size):
    np.random.seed(epoch * n_batches + batch_index) 
    indices = np.random.randint(m, size=batch_size)
    return scaler_housing_data_plus_bias[indices] , housing.target.reshape(-1, 1)[indices]

with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            print('batch index: ', batch_index, ' mse=', mse.eval())
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        print('epoch: ', epoch, ' mse=', mse.eval())
    best_theta = theta.eval()

batch index:  0  mse= 7.4008985
batch index:  1  mse= 6.282696
batch index:  2  mse= 5.685436
batch index:  3  mse= 5.352896
batch index:  4  mse= 5.161776
batch index:  5  mse= 5.049202
batch index:  6  mse= 4.981522
batch index:  7  mse= 4.9400396
batch index:  8  mse= 4.914078
batch index:  9  mse= 4.8974237
batch index:  10  mse= 4.886406
batch index:  11  mse= 4.8788342
batch index:  12  mse= 4.873391
batch index:  13  mse= 4.8692775
batch index:  14  mse= 4.8660083
batch index:  15  mse= 4.8632836
batch index:  16  mse= 4.86092
batch index:  17  mse= 4.858804
batch index:  18  mse= 4.856863
batch index:  19  mse= 4.855051
batch index:  20  mse= 4.85334
epoch:  0  mse= 4.8517118
batch index:  0  mse= 4.8517118
batch index:  1  mse= 4.850152
batch index:  2  mse= 4.848654
batch index:  3  mse= 4.847211
batch index:  4  mse= 4.8458195
batch index:  5  mse= 4.844475
batch index:  6  mse= 4.8431754
batch index:  7  mse= 4.841917
batch index:  8  mse= 4.840702
batch index:  9  mse= 4.8

## 保存和恢复模型

In [45]:
n_epochs = 1000
learning_rate = 0.1

X = tf.constant(scaler_housing_data_plus_bias, dtype=tf.float32, name='X')
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name='y')
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name='theta')
y_pred = tf.matmul(X, theta, name='predictions')
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name='mse')

optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

# saver
saver = tf.train.Saver()
# 按照变量名保存和恢复变量
# saver = tf.train.Saver({'weight': theta})

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        
        if epoch % 100 == 0:
            print('epoch: ', epoch, ' mse=', mse.eval())
            saver.save(sess, 'model.ckpt')
        sess.run(training_op)
    
    best_theta = theta.eval()
    saver.save(sess, 'model_final.ckpt')

epoch:  0  mse= 12.300475
epoch:  100  mse= 4.8090396
epoch:  200  mse= 4.80364
epoch:  300  mse= 4.8032956
epoch:  400  mse= 4.803259
epoch:  500  mse= 4.803254
epoch:  600  mse= 4.8032546
epoch:  700  mse= 4.8032537
epoch:  800  mse= 4.803254
epoch:  900  mse= 4.803254


In [46]:
# 恢复
with tf.Session() as sess:
    saver.restore(sess, 'model_final.ckpt')
    mse = sess.run(mse)
    print(mse)

Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from model_final.ckpt
4.8032537


## 用TensorBoard来可视化图和训练曲线

In [50]:
from datetime import datetime

now = datetime.utcnow().strftime('%Y%m%d%H%M%S')
root_logdir = 'tf_logs'
logdir = '{}/run-{}/'.format(root_logdir, now)

n_epochs = 10
learning_rate = 0.1

# 创建占位符节点
X = tf.placeholder(tf.float32, shape=(None, n + 1), name='X')
y = tf.placeholder(tf.float32, shape=(None, 1), name='y')

batch_size = 1000
n_batches = int(np.ceil(m / batch_size))

theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name='theta')
y_pred = tf.matmul(X, theta, name='predictions')
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name='mse')

optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

mse_summary = tf.summary.scalar('MSE', mse)
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

init = tf.global_variables_initializer()

# 创建batch
def fetch_batch(epoch, batch_index, batch_size):
    np.random.seed(epoch * n_batches + batch_index) 
    indices = np.random.randint(m, size=batch_size)
    return scaler_housing_data_plus_bias[indices] , housing.target.reshape(-1, 1)[indices]

with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            if batch_index % 10 == 0:
                summary_str = mse_summary.eval(feed_dict={X: X_batch, y: y_batch})
                print(summary_str)
                step = epoch * n_batches + batch_index
                file_writer.add_summary(summary_str, step)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
    best_theta = theta.eval()
    file_writer.close()

b'\n\r\n\x06MSE_23\x15b6\x01A'
b'\n\r\n\x06MSE_23\x15\xc2\xdb\x9e@'
b'\n\r\n\x06MSE_23\x15X\x1c\xa8@'
b'\n\r\n\x06MSE_23\x15\x93\xed\xac@'
b'\n\r\n\x06MSE_23\x15<(\xa3@'
b'\n\r\n\x06MSE_23\x15&\x05\xa1@'
b'\n\r\n\x06MSE_23\x15yL\xa2@'
b'\n\r\n\x06MSE_23\x15\xbco\x9d@'
b'\n\r\n\x06MSE_23\x15\x0e\xc1\x97@'
b'\n\r\n\x06MSE_23\x15R1\x9f@'
b'\n\r\n\x06MSE_23\x15d\x8c\xa1@'
b'\n\r\n\x06MSE_23\x15F\xa4\xa1@'
b'\n\r\n\x06MSE_23\x15\xbe(\x9e@'
b"\n\r\n\x06MSE_23\x15'\x97\x98@"
b'\n\r\n\x06MSE_23\x15Ho\x91@'
b'\n\r\n\x06MSE_23\x15\xf1\x8c\x98@'
b'\n\r\n\x06MSE_23\x15s\x99\x9f@'
b'\n\r\n\x06MSE_23\x15\xdf\x1e\x98@'
b'\n\r\n\x06MSE_23\x15\xfc\xdd\x95@'
b'\n\r\n\x06MSE_23\x15`\xb6\x95@'
b'\n\r\n\x06MSE_23\x15d\xa5\x9b@'
b'\n\r\n\x06MSE_23\x15RJ\x99@'
b'\n\r\n\x06MSE_23\x15h"\x9e@'
b'\n\r\n\x06MSE_23\x15\xf6\xb9\x95@'
b'\n\r\n\x06MSE_23\x15;\x81\x99@'
b'\n\r\n\x06MSE_23\x15\xfaz\x9a@'
b'\n\r\n\x06MSE_23\x15l\xe5\x98@'
b'\n\r\n\x06MSE_23\x15u\xa8\x96@'
b'\n\r\n\x06MSE_23\x15\xb2Y\x9b@'
b'\n\r\n\x06MS

- 启动 tensorboard 服务器
```
tensorboard --logdir tf_logs/
```
- 浏览器中输入 [http://localhost:6006]() 即可看到 tensorboard 界面

## 命名作用域

In [55]:
# 在这个作用域中定义的每个操作现在都有一个 ‘my_loss/’ 前缀
with tf.name_scope('my_loss') as scope:
    error = y_pred - y
    mse = tf.reduce_mean(tf.square(error), name='mse')
    
print(error.op.name)
print(mse.op.name)

my_loss/sub
my_loss/mse


## 模块化

In [60]:
def relu(X):
    w_shape = (int(X.get_shape()[1]), 1)
    w = tf.Variable(tf.random_normal(w_shape), name='weights')
    b = tf.Variable(0.0, name='bias')
    z = tf.add(tf.matmul(X, w), b, name='z')
    
    return tf.maximum(z, 0., name='relu')

n_features = 3
X = tf.placeholder(tf.float32, shape=(None, n_features), name='X')
relus = [relu(X) for i in range(5)]
output = tf.add_n(relus, name='output')

## 共享变量

如果想要在图的不同组件中共享变量：
- 最简单的做法是先创建，然后将其作为参数传递给需要他的函数。但是，如果有太多的类似的共享参数，则参数传递便会很痛苦
    ```
    def relu(X, threshold):
    ```

- 也可以在第一次调用时将共享变量设置为函数的一个属性
    ```
    def relu(X):
        if not hasattr(relu, 'threshold'):
            relu.threshold = tf.Variable(0.0, name='max')
    ```
    
- TF提供：
    如果共享变量不存在，该方法先通过get_variable()函数创建共享变量；如果已经存在了，就复用该共享变量。期望的行为通过当前variable_scope()的一个属性来控制（创建或者复用）
    ```
    with tf.variable_scope('relu'):
        threshold = tf.get_variable('threshold', shape=(), initializer=tf.constant_initilizer(0.0))
    ```
    如果这个变量之前已经被get_variable()调用创建过，这里会抛出一个异常。这种机制避免由于误操作而复用变量。如果要复用一个变量，需要通过设置变量作用域的reuse属性为True来显式的实现
    ```
    with tf.variable_scope('relu', reuse=True):
        threshold = tf.get_variable('threshold')
    ```
    这段代码会获取既有的‘relu/threshold’变量，如果该变量不存在，或者在调用get_variable()时没有创建成功，那么会抛出一个异常

In [65]:
def relu(X):
    w_shape = (int(X.get_shape()[1]), 1)
    w = tf.Variable(tf.random_normal(w_shape), name='weights')
    b = tf.Variable(0.0, name='bias')
    z = tf.add(tf.matmul(X, w), b, name='z')
    with tf.variable_scope('relu', reuse=True):
        threshold = tf.get_variable('threshold')
    return tf.maximum(z, threshold, name='max')

n_features = 3
X = tf.placeholder(tf.float32, shape=(None, n_features), name='X')
with tf.variable_scope('relu', reuse=tf.AUTO_REUSE):
      threshold = tf.get_variable('threshold', shape=(), initializer=tf.constant_initializer(0.0))
relus = [relu(X) for i in range(5)]
output = tf.add_n(relus, name='output')