In [2]:
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd
import sklearn
import os
import sys
import time
import tensorflow as tf
from tensorflow import keras

print (tf.__version__)
print (sys.version_info)
for module in mpl, np, pd, sklearn, tf, keras:
    print (module.__name__, module.__version__)

2.0.0-beta1
sys.version_info(major=3, minor=6, micro=8, releaselevel='final', serial=0)
matplotlib 3.1.1
numpy 1.16.4
pandas 0.24.2
sklearn 0.21.2
tensorflow 2.0.0-beta1
tensorflow.python.keras.api._v2.keras 2.2.4-tf


In [3]:
from sklearn.datasets import fetch_california_housing

housing = fetch_california_housing()
print (housing.DESCR)
print (housing.data.shape)
print (housing.target.shape)

.. _california_housing_dataset:

California Housing dataset
--------------------------

**Data Set Characteristics:**

    :Number of Instances: 20640

    :Number of Attributes: 8 numeric, predictive attributes and the target

    :Attribute Information:
        - MedInc        median income in block
        - HouseAge      median house age in block
        - AveRooms      average number of rooms
        - AveBedrms     average number of bedrooms
        - Population    block population
        - AveOccup      average house occupancy
        - Latitude      house block latitude
        - Longitude     house block longitude

    :Missing Attribute Values: None

This dataset was obtained from the StatLib repository.
http://lib.stat.cmu.edu/datasets/

The target variable is the median house value for California districts.

This dataset was derived from the 1990 U.S. census, using one row per census
block group. A block group is the smallest geographical unit for which the U.S.
Census Bur

In [4]:
import pprint
pprint.pprint (housing.data[0:5])
pprint.pprint(housing.target[0:5])

array([[ 8.32520000e+00,  4.10000000e+01,  6.98412698e+00,
         1.02380952e+00,  3.22000000e+02,  2.55555556e+00,
         3.78800000e+01, -1.22230000e+02],
       [ 8.30140000e+00,  2.10000000e+01,  6.23813708e+00,
         9.71880492e-01,  2.40100000e+03,  2.10984183e+00,
         3.78600000e+01, -1.22220000e+02],
       [ 7.25740000e+00,  5.20000000e+01,  8.28813559e+00,
         1.07344633e+00,  4.96000000e+02,  2.80225989e+00,
         3.78500000e+01, -1.22240000e+02],
       [ 5.64310000e+00,  5.20000000e+01,  5.81735160e+00,
         1.07305936e+00,  5.58000000e+02,  2.54794521e+00,
         3.78500000e+01, -1.22250000e+02],
       [ 3.84620000e+00,  5.20000000e+01,  6.28185328e+00,
         1.08108108e+00,  5.65000000e+02,  2.18146718e+00,
         3.78500000e+01, -1.22250000e+02]])
array([4.526, 3.585, 3.521, 3.413, 3.422])


In [5]:
from sklearn.model_selection import train_test_split

x_train_all, x_test, y_train_all, y_test = train_test_split(
    housing.data, housing.target, random_state = 7, test_size=0.25)
x_train, x_valid, y_train, y_valid = train_test_split(
    x_train_all, y_train_all, random_state=11, test_size = 0.25)

print (x_train.shape, y_train.shape)
print (x_valid.shape, y_valid.shape)
print (x_test.shape, y_test.shape)

(11610, 8) (11610,)
(3870, 8) (3870,)
(5160, 8) (5160,)


In [6]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)  #这里使用fit_transform函数，因为在训练集上获得均值和方差，在验证集和测试集上面使用
x_valid_scaled = scaler.transform(x_valid)
x_test_scaled = scaler.transform(x_test)

In [11]:
# metric 

metric = keras.metrics.MeanSquaredError()
print(metric([5.],[2.]))
print (metric([0.],[1.]))  ## 累加，第一个9 第二个1 ，平均后是5
print (metric.result())

metric.reset_states()
metric([1.],[3.])
print (metric.result())


tf.Tensor(9.0, shape=(), dtype=float32)
tf.Tensor(5.0, shape=(), dtype=float32)
tf.Tensor(5.0, shape=(), dtype=float32)
tf.Tensor(4.0, shape=(), dtype=float32)


In [13]:
'''
1. batch 遍历训练集 metric
    1.1 自动求导
2. epoch结束， 验证集 metric
'''
epochs  = 100
batch_size = 32
steps_per_epoch = len(x_train_scaled)// batch_size
optimizer = keras.optimizers.SGD()
metric = keras.metrics.MeanSquaredError()

def random_batch(x, y, batch_size=32):
    idx = np.random.randint(0,len(x),size = batch_size)
    return x[idx], y[idx]
    
model = keras.models.Sequential([
    keras.layers.Dense(30, activation ='relu',
                      input_shape =x_train.shape[1:]),
    keras.layers.Dense(1),
])

for epoch in range(epochs):
    metric.reset_states()
    for step in range(steps_per_epoch):
        x_batch, y_batch = random_batch(x_train_scaled,y_train,batch_size)
        with tf.GradientTape() as tape:
            y_pred = model(x_batch)
            loss = tf.reduce_mean(
                keras.losses.mean_squared_error(y_batch, y_pred))
            metric(y_batch, y_pred)
        grads = tape.gradient(loss, model.variables)
        grads_and_vars = zip(grads,model.variables)
        optimizer.apply_gradients(grads_and_vars)
        print ("\rEpoch",epoch, "train mse:",
              metric.result().numpy(),end="")
    y_valid_pred = model(x_valid_scaled)
    valid_loss = tf.reduce_mean(
        keras.losses.mean_squared_error(y_valid_pred, y_valid))
    print ("\t", "valid mse:", valid_loss.numpy())
        


Epoch 0 train mse: 2.3997624	 valid mse: 2.5101453933814803
Epoch 1 train mse: 2.2294533	 valid mse: 2.0627070957766853
Epoch 2 train mse: 1.3213911	 valid mse: 1.8278284778406257
Epoch 3 train mse: 1.3160485	 valid mse: 1.407581104287387
Epoch 4 train mse: 1.3205252	 valid mse: 1.407911177926407
Epoch 5 train mse: 1.2603146	 valid mse: 1.3931035297275924
Epoch 6 train mse: 1.260734	 valid mse: 1.4047070775611088
Epoch 7 train mse: 1.2637725	 valid mse: 1.3917189517728805
Epoch 8 train mse: 1.2987342	 valid mse: 1.3919972475659084
Epoch 9 train mse: 1.2443206	 valid mse: 1.3891765426832845
Epoch 10 train mse: 1.2574483	 valid mse: 1.400274170294384
Epoch 11 train mse: 1.2512546	 valid mse: 1.388660783117587
Epoch 12 train mse: 1.2582042	 valid mse: 1.3874036557424443
Epoch 13 train mse: 1.2387319	 valid mse: 1.387003432903522
Epoch 14 train mse: 1.2585131	 valid mse: 1.3878684852632213
Epoch 15 train mse: 1.2690367	 valid mse: 1.3882356801098472
Epoch 16 train mse: 1.2637272	 valid mse