## 使用GradientTape 与Keras集合

In [2]:
import tensorflow.keras as keras
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
% matplotlib inline
import numpy as np
import sklearn
import os
import sys
import time
import tensorflow as tf


In [3]:
print(tf.__version__)
print(sys.version_info)
for module in mpl, np, pd, sklearn, tf, keras:
    print(module.__name__, module.__version__)

2.3.0
sys.version_info(major=3, minor=7, micro=11, releaselevel='final', serial=0)
matplotlib 3.4.2
numpy 1.18.5
pandas 1.3.3
sklearn 1.0
tensorflow 2.3.0
tensorflow.keras 2.4.0


### 1. 导入数据

In [4]:
from sklearn.datasets import fetch_california_housing

# 加利福尼亚房屋数据集
housing = fetch_california_housing()

# 打印数据集描述
print(housing.DESCR)


.. _california_housing_dataset:

California Housing dataset
--------------------------

**Data Set Characteristics:**

    :Number of Instances: 20640

    :Number of Attributes: 8 numeric, predictive attributes and the target

    :Attribute Information:
        - MedInc        median income in block group
        - HouseAge      median house age in block group
        - AveRooms      average number of rooms per household
        - AveBedrms     average number of bedrooms per household
        - Population    block group population
        - AveOccup      average number of household members
        - Latitude      block group latitude
        - Longitude     block group longitude

    :Missing Attribute Values: None

This dataset was obtained from the StatLib repository.
https://www.dcc.fc.up.pt/~ltorgo/Regression/cal_housing.html

The target variable is the median house value for California districts,
expressed in hundreds of thousands of dollars ($100,000).

This dataset was derived

In [5]:
# 打印特征shape
print(housing.data.shape)
# 打印结果shape
print(housing.target.shape)

(20640, 8)
(20640,)


In [6]:
import pprint

# 打印数据, pprint 和 print 类似, 但是 pprint 打出的数据结构更加完整
pprint.pprint(housing.data[0:5])
pprint.pprint(housing.target[0:5])

array([[ 8.32520000e+00,  4.10000000e+01,  6.98412698e+00,
         1.02380952e+00,  3.22000000e+02,  2.55555556e+00,
         3.78800000e+01, -1.22230000e+02],
       [ 8.30140000e+00,  2.10000000e+01,  6.23813708e+00,
         9.71880492e-01,  2.40100000e+03,  2.10984183e+00,
         3.78600000e+01, -1.22220000e+02],
       [ 7.25740000e+00,  5.20000000e+01,  8.28813559e+00,
         1.07344633e+00,  4.96000000e+02,  2.80225989e+00,
         3.78500000e+01, -1.22240000e+02],
       [ 5.64310000e+00,  5.20000000e+01,  5.81735160e+00,
         1.07305936e+00,  5.58000000e+02,  2.54794521e+00,
         3.78500000e+01, -1.22250000e+02],
       [ 3.84620000e+00,  5.20000000e+01,  6.28185328e+00,
         1.08108108e+00,  5.65000000e+02,  2.18146718e+00,
         3.78500000e+01, -1.22250000e+02]])
array([4.526, 3.585, 3.521, 3.413, 3.422])


#### 1.1 分隔数据集

In [7]:
from sklearn.model_selection import train_test_split

# 划分训练集和测试集
x_train_all, x_test, y_train_all, y_test = train_test_split(housing.data, housing.target, random_state=7)

# 对训练集再次划分为训练集和验证集
x_train, x_valid, y_train, y_valid = train_test_split(x_train_all, y_train_all, random_state=20)

In [8]:
# 打印shape
print(x_train_all.shape, y_train_all.shape)
print(x_train.shape, y_train.shape)
print(x_valid.shape, y_valid.shape)
print(x_test.shape, y_test.shape)

(15480, 8) (15480,)
(11610, 8) (11610,)
(3870, 8) (3870,)
(5160, 8) (5160,)


#### 1.2 数据归一化

In [9]:
# 打印数据的最大最小值
print(np.max(x_train), np.min(x_train))

35682.0 -124.35


In [10]:
# 可以看到数据集的 scale 过大, 所以我们需要做数据集的归一化
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
# 数据归一化
# 注意 fit_transform 和 transform 的区别
x_train_scaled = scaler.fit_transform(x_train)
x_valid_scaled = scaler.fit_transform(x_valid)
x_test_scaled = scaler.fit_transform(x_test)

In [11]:
print(np.max(x_train_scaled), np.min(x_train_scaled))

94.77756736476127 -2.3750505046419277


### 2. 搭建神经网络

#### 2.1 使用 metric

- metric 会累积计算
- 如果不想让它累积计算, 可以使用 reset_states 重置状态

In [22]:
# 均方差
metric = keras.metrics.MeanSquaredError()
print(metric([5.], [2.]))  # 均方差为 9
print(metric([0.], [1.]))  # 均方差为 1
# 注意 : metric 会累积计算
print(metric.result())

print(metric.reset_states())

# 如果不想让它累积计算, 可以使用 reset_states 重置状态
print(metric([5.], [2.]))  # 均方差为 9
metric.reset_states()
print(metric([0.], [1.]))  # 均方差为 1
# 注意 : metric 会累积计算
print(metric.result())

tf.Tensor(9.0, shape=(), dtype=float32)
tf.Tensor(5.0, shape=(), dtype=float32)
tf.Tensor(5.0, shape=(), dtype=float32)
None
tf.Tensor(9.0, shape=(), dtype=float32)
tf.Tensor(1.0, shape=(), dtype=float32)
tf.Tensor(1.0, shape=(), dtype=float32)


In [28]:
# 1. batch 遍历训练接 metric
# 1.1 自动求导
# 2. epoch 结束, 验证集 metric

epochs = 20
batch_size = 32
# // 表示整数除法, 返回值是整数
# 计算轮数, 每次训练 batch_size 数据
steps_per_epoch = len(x_train_scaled) // batch_size
optimizer = keras.optimizers.SGD()
metric = keras.metrics.MeanSquaredError()


# 随机读取训练数据
def random_batch(x, y, batch_size=32):
    idx = np.random.randint(0, len(x), size=batch_size)
    #  x[idx] , 这种写法要求x必须是numpy数组, 传入索引, 返回对应索引的数据
    return x[idx], y[idx]


model = keras.Sequential([
    keras.layers.Dense(units=30, activation='relu', input_shape=x_train.shape[1:]),  # x_train.shape[1:] (8,)
    keras.layers.Dense(units=1)
])


In [29]:

# 自定义

for epoch in range(epochs):
    # 重置状态, 防止累积计算均方差
    metric.reset_states()
    for step in range(steps_per_epoch):
        # 获取指定batch_size 的随机训练数据
        x_batch, y_batch = random_batch(x_train_scaled, y_train, batch_size)
        with tf.GradientTape() as tape:
            # 预测
            y_pred = model(x_batch)
            # 计算loss均值
            loss = tf.reduce_mean(keras.losses.mean_squared_error(y_batch, y_pred))
            metric(y_batch, y_pred)
        # ?
        grads = tape.gradient(loss, model.variables)

        # 绑定 梯度 和 模型变量
        grads_and_vars = zip(grads, model.variables)
        # 更新参数
        optimizer.apply_gradients(grads_and_vars) # apply_gradients([(梯度值, 变量)])
        # 打印结果
        print("\r Epoch ", epoch, " train mse : ", metric.result().numpy(), end="")
    # 验证集
    y_valid_pred = model(x_valid_scaled)
    valid_loss = tf.reduce_mean(keras.losses.mean_squared_error(y_valid, y_valid_pred))
    print("\t" , "valid mse : ", valid_loss.numpy())

 Epoch  0  train mse :  1.7367289	 valid mse :  1.382116184 0  train mse :  2.0070326 0  train mse :  1.8763909 0  train mse :  1.84290151.69161630  train mse :  1.821
 Epoch  1  train mse :  1.27701371.3048114	 valid mse :  1.3464397
 Epoch  2  train mse :  1.2732786	 valid mse :  1.3395517
 Epoch  3  train mse :  1.2411273	 valid mse :  1.336819412 3  train mse :  1.1845381 3  train mse :  1.19248683  train mse :  1.2370757
 Epoch  4  train mse :  1.2605356	 valid mse :  1.336759757 train mse :  1.2489989 train mse :  1.2602865
 Epoch  5  train mse :  1.2886124	 valid mse :  1.3329636
 Epoch  6  train mse :  1.2735722	 valid mse :  1.335649369 train mse :  1.284568
 Epoch  7  train mse :  1.2823801	 valid mse :  1.339485
 Epoch  8  train mse :  1.2690874	 valid mse :  1.3332739
 Epoch  9  train mse :  1.2906994	 valid mse :  1.3332874
 Epoch  10  train mse :  1.3029801	 valid mse :  1.3366023464 train mse :  1.2894286 10  train mse :  1.289089
 Epoch  11  train mse :  1.2976263	 vali