In [1]:
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import sklearn
import pandas as pd
import os
import sys
import time
import tensorflow as tf

from tensorflow import keras

print(tf.__version__)
print(sys.version_info)
for module in mpl, np, pd, sklearn, tf, keras:
    print(module.__name__, module.__version__)

2.0.1
sys.version_info(major=3, minor=7, micro=4, releaselevel='final', serial=0)
matplotlib 3.1.3
numpy 1.18.1
pandas 1.0.1
sklearn 0.22.1
tensorflow 2.0.1
tensorflow_core.keras 2.2.4-tf


In [2]:
from sklearn.datasets import fetch_california_housing

housing = fetch_california_housing()

print(housing.DESCR)
print(housing.data.shape)
print(housing.target.shape)

.. _california_housing_dataset:

California Housing dataset
--------------------------

**Data Set Characteristics:**

    :Number of Instances: 20640

    :Number of Attributes: 8 numeric, predictive attributes and the target

    :Attribute Information:
        - MedInc        median income in block
        - HouseAge      median house age in block
        - AveRooms      average number of rooms
        - AveBedrms     average number of bedrooms
        - Population    block population
        - AveOccup      average house occupancy
        - Latitude      house block latitude
        - Longitude     house block longitude

    :Missing Attribute Values: None

This dataset was obtained from the StatLib repository.
http://lib.stat.cmu.edu/datasets/

The target variable is the median house value for California districts.

This dataset was derived from the 1990 U.S. census, using one row per census
block group. A block group is the smallest geographical unit for which the U.S.
Census Bur

In [3]:
from sklearn.model_selection import train_test_split

x_train_all, x_test, y_train_all, y_test = train_test_split(housing.data, housing.target, random_state=7)

x_train, x_valid, y_train, y_valid = train_test_split(x_train_all, y_train_all, random_state=11)
print(x_train.shape, y_train.shape)
print(x_valid.shape, y_valid.shape)
print(x_test.shape, y_test.shape)

(11610, 8) (11610,)
(3870, 8) (3870,)
(5160, 8) (5160,)


In [4]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_valid_scaled = scaler.transform(x_valid)
x_test_scaled = scaler.transform(x_test)

In [5]:
model = keras.models.Sequential([
    keras.layers.Dense(30, activation="relu", input_shape=x_train.shape[1:]),
    keras.layers.Dense(1)
])


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 30)                270       
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 31        
Total params: 301
Trainable params: 301
Non-trainable params: 0
_________________________________________________________________


In [7]:
metric = keras.metrics.MeanSquaredError()

print(metric([5.], [2.]))
print(metric([1.],[0.]))
print(metric.result())
metric.reset_states()
print(metric([1.],[3.]))
print(metric.result())

tf.Tensor(9.0, shape=(), dtype=float32)
tf.Tensor(5.0, shape=(), dtype=float32)
tf.Tensor(5.0, shape=(), dtype=float32)
tf.Tensor(4.0, shape=(), dtype=float32)
tf.Tensor(4.0, shape=(), dtype=float32)


In [14]:
# fit函数：
# 1. batch遍历数据集 metric
#　1.1自动求导
#２.epoch验证集 metric
epochs = 30
batch_size = 32
steps_per_epoch = len(x_train_scaled) // batch_size # //  整除
optimizer = keras.optimizers.SGD()
metric = keras.metrics.MeanSquaredError()

def random_batch_data(x, y, batch_size = 32):
    idx = np.random.randint(0, len(x), size=batch_size)
    return x[idx], y[idx]

for epoch in range(epochs):
    metric.reset_states()
    for _ in range(steps_per_epoch):
        with tf.GradientTape() as tape:
            x_batch, y_batch = random_batch_data(x_train_scaled, y_train, batch_size)
            y_pred = model(x_batch)
            loss = tf.reduce_mean(keras.losses.mean_squared_error(y_batch, y_pred))
            metric(y_batch, y_pred)
        grads = tape.gradient(loss, model.trainable_variables)
        grads_and_vars = zip(grads, model.trainable_variables)
        optimizer.apply_gradients(grads_and_vars)
        print("\rEpoch", epoch, "train mse:", metric.result().numpy(), end=" ")
    y_valid_pred = model(x_valid_scaled)
    valid_loss = keras.losses.mean_squared_error(y_valid, y_valid_pred)
    print("\r Valid mse: ", valid_loss.numpy())
        

 Valid mse:  [1.4141254 1.38271   1.4060193 ... 1.4067539 1.3850067 1.396868 ]
 Valid mse:  [1.3879762 1.4025494 1.4038855 ... 1.3855348 1.3816978 1.3816775]
 Valid mse:  [1.3912798 1.4093871 1.3931597 ... 1.3838842 1.3826039 1.3822168]
 Valid mse:  [1.398144  1.3952924 1.3833652 ... 1.3909092 1.3819871 1.3861772]
 Valid mse:  [1.4093728 1.3816515 1.3816628 ... 1.4144214 1.3928028 1.4031798]
 Valid mse:  [1.3898083 1.3991848 1.3817968 ... 1.3877413 1.3819319 1.38715  ]
 Valid mse:  [1.3974268 1.3845586 1.3822436 ... 1.3935685 1.3856416 1.3859234]
 Valid mse:  [1.3848157 1.3882247 1.3814793 ... 1.3874767 1.3829085 1.384439 ]
 Valid mse:  [1.3847616 1.3948585 1.3815098 ... 1.384924  1.3816727 1.3830552]
 Valid mse:  [1.3829598 1.3931457 1.3824304 ... 1.386058  1.3823364 1.3821487]
 Valid mse:  [1.4006087 1.3816285 1.3875215 ... 1.4085143 1.3903701 1.3949078]
 Valid mse:  [1.383177  1.3881332 1.3825922 ... 1.3867644 1.3823168 1.3814967]
 Valid mse:  [1.3837011 1.3834667 1.382103  ... 1.39