In [1]:
from keras.datasets import boston_housing

Using TensorFlow backend.


In [2]:
(train_data, train_targets), (test_data, test_targets) = boston_housing.load_data('/temp/keras/boston_housing.npz')

In [3]:
print(train_data.shape)
print(test_data.shape)
print(train_data[0])

(404, 13)
(102, 13)
[  1.23247   0.        8.14      0.        0.538     6.142    91.7
   3.9769    4.      307.       21.      396.9      18.72   ]


In [4]:
# 数据标准化 - 对每个特征做标准化
# 1. 数据数据的每个特征(列），减去特征平均值
# 2. 再除以标准差
# 这样得到特征平均值为0，标准差为1

# 训练数据集每列的均值
mean = train_data.mean(axis=0)

# 训练数据集减去均值后的矩阵
train_data -= mean

# 训练数据集每列的标准差
std = train_data.std(axis=0)

# 减去均值后的特征除以标准差
train_data /= std

print(train_data[0])

[-0.27224633 -0.48361547 -0.43576161 -0.25683275 -0.1652266  -0.1764426
  0.81306188  0.1166983  -0.62624905 -0.59517003  1.14850044  0.44807713
  0.8252202 ]


In [5]:
test_data -= mean
test_data /= std

In [6]:
from keras import models
from keras import layers

# mse loss function - mean squared error
# mae metrc - mean absolute error, abs(targets - predictions)
def build_model():
    model = models.Sequential()
    model.add(layers.Dense(64, activation='relu', input_shape=(train_data.shape[1],)))
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(1))
    model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
    return model

In [7]:
# K折交叉验证 100轮

import numpy as np

k = 4
# 训练集的总长度除以k取整
num_val_samples = len(train_data) // k 
num_epochs = 100
all_scores = []

for i in range(k):
    
    print('processing fold #', i)
    
    # 取该折的验证数据集
    val_data = train_data[i * num_val_samples : (i + 1) * num_val_samples]
    val_targets = train_targets[i * num_val_samples : (i + 1) * num_val_samples]
    
    # 取该折的实际训练数据集
    partial_train_data = np.concatenate([
        train_data[:i * num_val_samples],
        train_data[(i + 1) * num_val_samples:]], axis=0)
    partial_train_targets = np.concatenate([
        train_targets[:i * num_val_samples],
        train_targets[(i + 1) * num_val_samples:]], axis=0)
    
    # 模型构建，训练，评估
    model = build_model()
    model.fit(partial_train_data, partial_train_targets,
             epochs=num_epochs, batch_size=1, verbose=0)
    val_mse, val_mae = model.evaluate(val_data, val_targets, verbose=0)
    
    # 保存每折的分数
    all_scores.append(val_mae)

print(all_scores)
print(np.mean(all_scores))

processing fold # 0
processing fold # 1
processing fold # 2
processing fold # 3
[2.1428920054199674, 2.2554378580338885, 2.9350512004134677, 2.2923117917363007]
2.406423213900906


In [13]:
# K折交叉验证 500轮

num_epochs = 500
all_mae_histories = []

for i in range(k):
    
    print('processing fold #', i)
    
    # 取该折的验证数据集
    val_data = train_data[i * num_val_samples : (i + 1) * num_val_samples]
    val_targets = train_targets[i * num_val_samples : (i + 1) * num_val_samples]
    
    # 取该折的实际训练数据集
    partial_train_data = np.concatenate([
        train_data[:i * num_val_samples],
        train_data[(i + 1) * num_val_samples:]], axis=0)
    partial_train_targets = np.concatenate([
        train_targets[:i * num_val_samples],
        train_targets[(i + 1) * num_val_samples:]], axis=0)
    
    # 模型构建，训练，评估
    model = build_model()
    history = model.fit(partial_train_data, partial_train_targets,
                        epochs=num_epochs, batch_size=1, verbose=0,
                        validation_data=(val_data, val_targets))
    
    # 保存每折的分数
    mae_history = history.history['val_mean_absolute_error']
    
    all_mae_histories.append(mae_history)
    
average_mae_history = [
    np.mean([x[i] for x in all_mae_histories]) for i in range(num_epochs)]


processing fold # 0
processing fold # 1
processing fold # 2
processing fold # 3


In [None]:
# 绘制验证分数
import matplotlib.pyplot as plt
%matplotlib inline

plt.plot(range(1, len(average_mae_history) + 1), average_mae_history)
plt.xlabel('Epochs')
plt.ylabel('Validation MAE')
plt.show