In [1]:
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import sklearn
import pandas as pd
import os
import sys
import time
import tensorflow as tf

from tensorflow import keras

print(tf.__version__)
print(sys.version_info)
for module in mpl, np, pd, sklearn, tf, keras:
    print(module.__name__, module.__version__)

2.2.0
sys.version_info(major=3, minor=6, micro=9, releaselevel='final', serial=0)
matplotlib 3.3.4
numpy 1.19.5
pandas 1.1.5
sklearn 0.24.2
tensorflow 2.2.0
tensorflow.keras 2.3.0-tf


In [2]:
from sklearn.datasets import fetch_california_housing

housing = fetch_california_housing()
print(housing.DESCR)
print(housing.data.shape)
print(housing.target.shape)

.. _california_housing_dataset:

California Housing dataset
--------------------------

**Data Set Characteristics:**

    :Number of Instances: 20640

    :Number of Attributes: 8 numeric, predictive attributes and the target

    :Attribute Information:
        - MedInc        median income in block
        - HouseAge      median house age in block
        - AveRooms      average number of rooms
        - AveBedrms     average number of bedrooms
        - Population    block population
        - AveOccup      average house occupancy
        - Latitude      house block latitude
        - Longitude     house block longitude

    :Missing Attribute Values: None

This dataset was obtained from the StatLib repository.
http://lib.stat.cmu.edu/datasets/

The target variable is the median house value for California districts.

This dataset was derived from the 1990 U.S. census, using one row per census
block group. A block group is the smallest geographical unit for which the U.S.
Census Bur

In [3]:
from sklearn.model_selection import train_test_split

x_train_all, x_test, y_train_all, y_test = train_test_split(
    housing.data, housing.target, random_state = 7)
x_train, x_valid, y_train, y_valid = train_test_split(
    x_train_all, y_train_all, random_state = 11)
print(x_train.shape, y_train.shape)
print(x_valid.shape, y_valid.shape)
print(x_test.shape, y_test.shape)


(11610, 8) (11610,)
(3870, 8) (3870,)
(5160, 8) (5160,)


In [4]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_valid_scaled = scaler.transform(x_valid)
x_test_scaled = scaler.transform(x_test)

In [5]:
# 首先我们看下metric(算子)使用
# 掌握reset_states的使用
# 模拟每一轮epoch训练的数据其实是将所有batch_size的均方差加起来求平均值
metric = keras.metrics.MeanSquaredError()
print(metric([5.], [2.]))
print('-'*50)
print(metric([0.], [1.]))
print('-'*50)
#具有累加功能，第1个是9，第二个是1，平均是5，(9+1)/2
# result()里面的值是metric没有经过reset_states()重置时，里面所有值的平均数
print(metric.result())
print('-'*50)
#不想累加就reset
metric.reset_states()  #每次epoch需要reset，相当于清空之前epoch算的
metric([1.], [3.])
print(metric.result())

tf.Tensor(9.0, shape=(), dtype=float32)
--------------------------------------------------
tf.Tensor(5.0, shape=(), dtype=float32)
--------------------------------------------------
tf.Tensor(5.0, shape=(), dtype=float32)
--------------------------------------------------
tf.Tensor(4.0, shape=(), dtype=float32)


In [6]:
#训练集的样本数
print(len(x_train_scaled))
print(x_train.shape[1:])  #特征数

11610
(8,)


In [7]:
11610/32  #每次训练给予的样本数

362.8125

In [8]:
t= np.arange(6).reshape(1, 2, 1, 3)
print(t)
tf.squeeze(t)  # [2, 3]  #降维，从张量形状中移除大小为1的维度

[[[[0 1 2]]

  [[3 4 5]]]]


<tf.Tensor: shape=(2, 3), dtype=int64, numpy=
array([[0, 1, 2],
       [3, 4, 5]])>

In [9]:
#随机挑选5个样本，看下特征，标签
idx = np.random.randint(0, 1000, size=5)
print(idx)
print(x_train_scaled[idx])
y_train[idx]

[219 115 580 962 684]
[[-2.31468099e-01 -4.82395224e-02  2.75999844e-02  7.94633262e-02
  -5.59382726e-01  1.53377321e-02  1.56112088e+00 -1.90503424e+00]
 [ 7.78472443e-01 -1.08954260e+00  8.45508087e-01 -1.20959861e-03
  -1.13905209e-01  3.19084823e-03  1.25318536e+00 -1.37572664e+00]
 [ 2.02163604e+00 -1.97064520e+00  4.53768873e-01 -7.72085820e-02
   1.06919598e+00 -5.25523370e-02 -1.24295862e+00  1.16594852e+00]
 [-2.62425968e-01  9.93063554e-01 -5.46798793e-01 -9.17670797e-02
  -6.51018630e-01 -9.55003183e-02 -7.53061201e-01  5.61738903e-01]
 [-3.89667607e-01  9.12963317e-01 -2.43600582e-01 -5.17982452e-02
  -5.33071426e-01 -1.58033144e-02  1.27184812e+00 -1.57546536e+00]]


array([1.094, 2.026, 3.37 , 3.181, 1.833])

In [10]:
# squeeze的作用
t=tf.constant([[1],[2],[3]])
print(t)
# 后面这个1加不加都无所谓
tf.squeeze(t,1)

tf.Tensor(
[[1]
 [2]
 [3]], shape=(3, 1), dtype=int32)


<tf.Tensor: shape=(3,), dtype=int32, numpy=array([1, 2, 3], dtype=int32)>

In [11]:
# 为了在一次epoch内只打印一次损失，模拟官方接口
for i in range(10):
    print('helloworld',end='\r')

helloworld

In [12]:
# 自定义损失函数
def customized_mse(y_true, y_pred):
    return tf.reduce_mean(tf.square(y_pred - y_true))

In [13]:
# 自定义激活函数
customized_softplus = keras.layers.Lambda(lambda  x: tf.math.log(1+tf.math.exp(x)))

In [16]:
#自定义layer
class CustomizedDenseLayer(keras.layers.Layer):
    def __init__(self, units, activation=None, **kwargs):
        '''
        初始化
        :param units: 输入维度
        :param activation: 激活函数
        :param kwargs:
        '''
        super(CustomizedDenseLayer, self).__init__(**kwargs)
        self.units = units
        self.activation = keras.layers.Activation(activation)

    def build(self, input_shape):
        '''
        Creates the variables of the layer (optional, for subclass implementers).
        :param input_shape: 输入维度
        :return: 返回一个自定义的层
        '''
        self.kernel = self.add_weight(name= 'kernel',
                                      shape= (input_shape[1], self.units),
                                      initializer= 'uniform',
                                      trainable= True)
        self.bias = self.add_weight(name= 'bias',
                                    shape= (self.units, ),
                                    initializer= 'zeros',
                                    trainable= True)
        super(CustomizedDenseLayer, self).build(input_shape)

    def call(self, x):
        return self.activation(x @ self.kernel + self.bias)

In [17]:
#
# 1. batch 遍历训练集 metric
#    1.1 自动求导
# 2. epoch结束 验证集 metric

epochs = 5 #训练多少次，没有early_stopping就真的是循环100次
batch_size = 32 
steps_per_epoch = len(x_train_scaled) // batch_size  #计算每一次epoch有多少个batch
print(steps_per_epoch)
print('-'*50)
optimizer = keras.optimizers.SGD()
# 定义算子
metric = keras.metrics.MeanSquaredError()

#随机取数据,取出来32个样本
def random_batch(x, y):
    idx = np.random.randint(0, len(x), size=batch_size)
    return x[idx], y[idx]

model = keras.models.Sequential([
    CustomizedDenseLayer(30, activation=customized_softplus,
                         input_shape=x_train.shape[1:]),
    CustomizedDenseLayer(1),
])
# model = keras.models.Sequential([
#     keras.layers.Dense(30, activation='relu',
#                        input_shape=x_train.shape[1:]),
#     keras.layers.Dense(1),
# ])
print(model.variables)
# 这个variables输出的是第一层隐藏层初始化的w和b，第二层输出层初始化的w和b（这个b只有一个0，容易看漏）

362
--------------------------------------------------
[<tf.Variable 'customized_dense_layer_2/kernel:0' shape=(8, 30) dtype=float32, numpy=
array([[ 3.25188078e-02,  4.95651104e-02, -3.55176814e-02,
         4.04045917e-02, -3.12330369e-02, -1.26496069e-02,
         2.88982280e-02, -4.61995266e-02,  1.31324567e-02,
        -2.38474142e-02, -1.65327191e-02,  1.68738402e-02,
         4.20048721e-02,  4.36510779e-02, -2.77848374e-02,
         5.46249002e-03, -1.03545897e-02, -2.75423769e-02,
         1.07335448e-02, -2.85104997e-02,  1.19488314e-03,
         3.33558396e-03,  3.99477743e-02,  4.98084538e-02,
         2.62764134e-02, -1.39222369e-02, -4.96409535e-02,
         4.15737368e-02,  5.68091869e-04, -4.41354290e-02],
       [-1.22189522e-03, -1.85407288e-02, -1.74312368e-02,
        -2.95979623e-02,  2.24985220e-02,  3.70680131e-02,
         1.08455047e-02, -6.69698790e-03, -5.12184948e-03,
         3.52028124e-02, -4.65253741e-03, -5.98502159e-03,
        -2.33681202e-02,  6.6839

In [18]:
#下面一部分相当于替代了fit和compile函数
# model.summary()
# epochs是模型层数
for epoch in range(epochs):#每一轮epochs训练所有的样本
    metric.reset_states()  #清空损失
    for step in range(steps_per_epoch):  # 这个steps_per_epoch不严谨，因为这个值是整除得到的，意味着其实还有一些数据遗漏了
        #随机取32个样本
        x_batch, y_batch = random_batch(x_train_scaled, y_train)
        with tf.GradientTape() as tape:
            #得到预测值
            # y_pred是一个shape=(32,1)的张量
            # 用model和model.predict是一样作用的，只是使用场景不同
            # model时函数式调用，调用的是model的call方法，predict方法时给大规模的batch数据用的，小规模直接用model更快
            y_pred = model(x_batch) #等价于model.predict
 
            #删减了值为1的维度,二阶张量，变为一阶张量
            # 之所以要变为一阶的是因为标签值是一维的，不是二维的，但(32,1)是二维的，而我们要与标签值运算计算出损失
            y_pred = tf.squeeze(y_pred, 1)
            #计算损失
            loss = keras.losses.mean_squared_error(y_batch, y_pred)
            #算子计算均方误差，和上一行代码计算的值一样，只不过会用于不同的用途，上一句用于减小误差，下一句用于打印显示
            # 这里的计算得到的值会进入到metric里面
            metric(y_batch, y_pred)
        #求梯度，不得不说这行代码野心很大，model.variables这么多数据，这里不止对w求导，还对b求导了
        grads = tape.gradient(loss, model.variables)
        #梯度和变量绑定，对应的变量减去对应的梯度,这个zip的存在是因为apply_gradients对传输进去数据格式的要求
        # zip就将对应数据封装成数组里面是元组的格式
        grads_and_vars = zip(grads, model.variables)
        # 更新，通过，apply_gradients去更新模型的model.variables，也就是更新了w,b
        # 不用这个方法还得写for循环
        optimizer.apply_gradients(grads_and_vars)
        # 看上面的代码有测试metric.result()是啥
        # result()里面的值是metric没有经过reset_states()重置时，里面所有值的平均数
        p="Epoch "+str(epoch)+" train mse:"+str(metric.result().numpy())
        #打印，不要在循环内加print，影响\r
        print(p, end='\r')
    print('') #打换行的目的是为了新起一行显示


    #搞了一轮训练后，认为模型可以了，去验证集验证，并打印误差
    y_valid_pred = model(x_valid_scaled)
    # 删减了值为1的维度
#     print(y_valid_pred.shape)
    y_valid_pred = tf.squeeze(y_valid_pred, 1)
#     print(y_valid_pred.shape)
    valid_loss = keras.losses.mean_squared_error(y_valid_pred, y_valid)
    print("\t", "valid mse: ", valid_loss.numpy())
        


Epoch 0 train mse:0.95475894
	 valid mse:  0.7271466576003275
Epoch 1 train mse:0.64742587poch 1 train mse:0.6546194Epoch 1 train mse:0.6552668Epoch 1 train mse:0.649997Epoch 1 train mse:0.64947015Epoch 1 train mse:0.6449426Epoch 1 train mse:0.64381164
	 valid mse:  0.6813731261811148
Epoch 2 train mse:0.58717477
	 valid mse:  0.6249574592422186
Epoch 3 train mse:0.57688236Epoch 3 train mse:0.5771805
	 valid mse:  0.5895682566296544
Epoch 4 train mse:0.55005324poch 4 train mse:0.5493298Epoch 4 train mse:0.55079246Epoch 4 train mse:0.5450721Epoch 4 train mse:0.54520327
	 valid mse:  0.554914567051243
