In [1]:
import tensorflow as tf
print(tf.__version__)

2.0.0-rc0


## 自动求导机制

- GradientTape是eager模式下计算梯度用的
- watch(tensor)

作用：确保某个tensor被tape追踪

参数:tensor：一个Tensor或者一个Tensor列表

- gradient(target, sources)

作用：根据tape上面的上下文来计算某个或者某些tensor的梯度参数

target:被微分的Tensor或Tensor列表，你可以理角为经过某个函数之后的值

sources：Tensors或者Variables列表（当然可以只有一个值），你可以理解为函数的某个变量

返回：

一个列表表示各个变理的梯度值，和source中的变量列表一一对应，表明这个变量的梯度。

下面的例子可中的梯度计算部分可以更直观的理解这个函数的用法。

In [2]:
x = tf.constant(3.0)

with tf.GradientTape() as g:
    g.watch(x)
    y = x * x
    dy_dx = g.gradient(y, x) #y` = 2*x = 2*3 =6

In [3]:
dy_dx

<tf.Tensor: id=5, shape=(), dtype=float32, numpy=6.0>

## 案例1、模型自动求导

构建模型（神经网络的前向传播）->定义损失函数->定义优化函数->定义tape->模型得到预测值->前向传播得到loss->反向传播->用优化函数将计算出来的梯度更新到变量上面去

In [4]:
class MyModel(tf.keras.Model):
    
    def __init__(self, num_classes=10):
        super(MyModel, self).__init__(name='my_model')
        self.num_classes = num_classes
        # 定义自己需要的层
        self.dense_1 = tf.keras.layers.Dense(32, activation='relu') # 隐藏层
        self.dense_2 = tf.keras.layers.Dense(num_classes) #输出层
    def call(self, inputs):
        # 定义前向传播
        # 使用在__init__定义的层
        x = self.dense_1(inputs)
        return self.dense_2(x)

In [5]:
import numpy as np
# 10 分类问题
data = np.random.random((1000, 32))
labels = np.random.random((1000,10))


In [6]:
model = MyModel(num_classes=10)

loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
optimizer = tf.keras.optimizers.Adam()

with tf.GradientTape() as tape:
    predictions = model(data)
    loss = loss_object(labels, predictions)

gradients = tape.gradient(loss, model.trainable_variables) # 求梯度

optimizer.apply_gradients(zip(gradients, model.trainable_variables))



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



<tf.Variable 'UnreadVariable' shape=() dtype=int64, numpy=1>

In [7]:
model.trainable_variables

[<tf.Variable 'my_model/dense/kernel:0' shape=(32, 32) dtype=float32, numpy=
 array([[-0.13878939, -0.01602063, -0.13419615, ..., -0.2536759 ,
          0.12140706,  0.14884247],
        [-0.2123425 ,  0.02857339,  0.1192424 , ..., -0.15994148,
         -0.09618257,  0.21944065],
        [-0.05170468, -0.19856381, -0.24476491, ...,  0.17599663,
         -0.15568767, -0.30255696],
        ...,
        [-0.0309853 ,  0.14540717,  0.16526942, ..., -0.22511657,
          0.10425398,  0.24007842],
        [ 0.18376976,  0.11973252, -0.21223587, ..., -0.01269613,
          0.0319657 ,  0.20275332],
        [ 0.16396794,  0.17821506, -0.29619327, ..., -0.28940034,
         -0.11030591,  0.16110216]], dtype=float32)>,
 <tf.Variable 'my_model/dense/bias:0' shape=(32,) dtype=float32, numpy=
 array([-0.00099989,  0.00100002,  0.00099932,  0.00099974,  0.00100001,
        -0.001     , -0.00100001, -0.00099982,  0.00099991, -0.00099999,
        -0.00099985,  0.00099865, -0.00100002, -0.001     ,  0

## 案例2、GradientType自定义训练模型

In [8]:
class MyModel(tf.keras.Model):
    
    def __init__(self, num_classes=10):
        super(MyModel, self).__init__(name='MyModel')
        self.num_classes = num_classes
        # 自定义自己需要的层
        self.dense_1 = tf.keras.layers.Dense(32, activation='relu')
        self.dense_2 = tf.keras.layers.Dense(num_classes)
        
    def call(self, inputs):
        # 定义前向传播
        # 使用在'__init__'定义的层
        x = self.dense_1(inputs)
        return self.dense_2(x)

In [9]:
import numpy as np

data = np.random.random((1000, 32))
labels = np.random.random((1000, 10))

In [10]:
model = MyModel(num_classes = 10)
# Instantiate an optimizer
optimizer = tf.keras.optimizers.SGD(learning_rate=1e-3)
# Instantiate a loss function
loss_fn = tf.keras.losses.CategoricalCrossentropy()
# Prepare the trainint dataset
batch_size =64
train_dataset = tf.data.Dataset.from_tensor_slices((data, labels))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)


In [11]:
# epoch
# batch_size
# tape 求梯度，梯度更新

In [12]:
epochs = 3
for epoch in range(epochs):
    print('Start of epoch %d' % (epoch,))
    
    # 遍历数据集的batch_size
    for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
        # 打开GradientType以记录正向传递期间运行的操作，这将启用自动区分。
        with tf.GradientTape() as tape:
            # 运行该模型的前向传播。模型应用于其输入的操作将记录在GradientTape上。
            logits = model(x_batch_train, training=True)
            # 计算这个minibatch的损失值
            loss_value = loss_fn(y_batch_train, logits)
        # 使用GradientTape自动获取可训练变量对于损失的梯度。    
        grads = tape.gradient(loss_value, model.trainable_weights)
        # 通过更新变量的值来最大程度地减少损失，从而执行梯度下降的一步。
        optimizer.apply_gradients(zip(grads, model.trainable_weights))
        # 每200 batches打印一次
        if step % 200 == 0:
            print('Training loss (for one batch) at step %s: %s' % (step, float(loss_value)))
            print('Seen so far: %s samples' % ((step + 1) * 64))

Start of epoch 0


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Training loss (for one batch) at step 0: 39.04924011230469
Seen so far: 64 samples
Start of epoch 1
Training loss (for one batch) at step 0: 38.56948471069336
Seen so far: 64 samples
Start of epoch 2
Training loss (for one batch) at step 0: 35.455162048339844
Seen so far: 64 samples


## 案例3、使用GradientTape自定义训练模型进阶(加入评估函数)

让我们将metric添加到组合中，下面可以从头开始编写的训练循环中随时使用内置指标(或编写的自定义指标)。流程如下：

- 在循环开始时初始化metrics
- metric.update_state():每batch之后更新
- metric.result():需要显示metric的当前值时调用
- metric.reset_states():需要清除metrics状态时重置(通常在每个epoch的结尾)

In [14]:
class MyModel(tf.keras.Model):
    
    def __init__(self, num_classes=10):
        super(MyModel, self).__init__(name='my_model')
        self.num_classes = num_classes
        # 定义自己需要的层
        self.dense_1 = tf.keras.layers.Dense(32, activation='relu')
        self.dense_2 = tf.keras.layers.Dense(num_classes)
        
    def call(self, inputs):
        # 定义前向传播
        # 使用 __init__定义的层
        x = self.dense_1(inputs)
        return self.dense_2(x)

In [17]:
import numpy as np

x_train = np.random.random((1000,32))
y_train = np.random.random((1000,10))
x_val = np.random.random((200,32))
y_val = np.random.random((200,10))
x_test = np.random.random((200,32))
y_test = np.random.random((200,10))

#优化器
optimizer = tf.keras.optimizers.SGD(learning_rate=1e-3)
#损失函数
loss_fn = tf.keras.losses.CategoricalCrossentropy(from_logits=True)

#准备metrics函数
train_acc_metric = tf.keras.metrics.CategoricalAccuracy()
val_acc_metric = tf.keras.metrics.CategoricalAccuracy()

#准备训练数据集
batch_size = 64
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)

#准备测试数据集
val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val))
val_dataset = val_dataset.batch(64)

进行几个epoch运行训练循环：

In [18]:
model = MyModel(num_classes=10)
epochs = 3
for epoch in range(epochs):
    print('Start of epoch %d' % (epoch,))
    
    #遍历数据集的batch_size
    for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
        # 一个batch
        with tf.GradientTape() as tape:
            logits = model(x_batch_train)
            loss_value = loss_fn(y_batch_train, logits)
        grads = tape.gradient(loss_value, model.trainable_weights)
        optimizer.apply_gradients(zip(grads,model.trainable_weights))
        
        #更新训练集的metrics
        train_acc_metric(y_batch_train,logits)
        
        #在每个epoch结束时显示metrics
        train_acc = train_acc_metric.result()
        print('Training acc over epoch :%s' % (float(train_acc),))
        
        #在每个epoch结束时重置训练指标
        train_acc_metric.reset_states()
        
        #在每个epoch结束时运行一个验证集
        for x_batch_val, y_batch_val in val_dataset:
            val_logits = model(x_batch_val)
            #更新验证集metrics
            val_acc_metric(y_batch_val, val_logits)
        val_acc = val_acc_metric.result()
        print('Validation acc :%s ' % (float(val_acc),))
        val_acc_metric.reset_states()
        
        #显示测试集
        

Start of epoch 0


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Training acc over epoch :0.09375
Validation acc :0.125 
Training acc over epoch :0.125
Validation acc :0.125 
Training acc over epoch :0.09375
Validation acc :0.125 
Training acc over epoch :0.125
Validation acc :0.125 
Training acc over epoch :0.1875
Validation acc :0.125 
Training acc over epoch :0.09375
Validation acc :0.125 
Training acc over epoch :0.078125
Validation acc :0.125 
Training acc over epoch :0.09375
Validation acc :0.125 
Training acc over epoch :0.078125
Validation acc :0.125 
Training acc over epoch :0.03125
Validation acc :0.125 
Training acc over epoch :0.09375
Validation acc :0.125 
Training acc over epoch :0.109375
Validation acc :0.125 
Training acc