In [1]:
import tensorflow as tf
from tensorflow import keras
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import pandas as pd

# 1. 加载和准备数据
housing = fetch_california_housing()

X_train_full, X_test, y_train_full, y_test = train_test_split(
    housing.data, housing.target, random_state=42
)

X_train, X_valid, y_train, y_valid = train_test_split(
    X_train_full, y_train_full, random_state=42
)

# 2. 数据缩放
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_valid = scaler.transform(X_valid)
X_test = scaler.transform(X_test)

# 3. 构建模型 (优化：增加了深度)
model = keras.Sequential([
    # 输入层：形状为特征数量
    keras.layers.Input(shape=X_train.shape[1:]),
    # 优化：使用两个隐藏层
    keras.layers.Dense(30, activation="relu"),
    keras.layers.Dense(30, activation="relu"),
    # 输出层：1个神经元，用于回归（默认线性激活）
    keras.layers.Dense(1)
])

# 4. 编译模型 (优化：使用 Adam 优化器)
model.compile(loss="mean_squared_error",  # 回归问题使用均方误差
              optimizer=keras.optimizers.Adam(learning_rate=1e-3),
              metrics=["mean_absolute_error"]) # 添加 MAE 作为监控指标

model.summary()

# 5. 定义回调 (优化：保存最佳模型 + 早停)

# 优化：只保存在验证集上损失最低的模型
checkpoint_cb = keras.callbacks.ModelCheckpoint("my_best_model.h5",
                                                monitor='val_loss',
                                                save_best_only=True)

# 优化：如果验证集损失在 10 个 epoch 内没有改善，则停止训练
early_stopping_cb = keras.callbacks.EarlyStopping(patience=10,
                                                  monitor='val_loss',
                                                  restore_best_weights=True) # 自动恢复到最佳权重

# 6. 训练模型
# 注意：我们将 epochs 增加到一个较大的数，让 EarlyStopping 去决定何时停止
history = model.fit(X_train, y_train,
                    epochs=100,  # 设置一个较大的值，让早停来控制
                    validation_data=(X_valid, y_valid),
                    callbacks=[checkpoint_cb, early_stopping_cb])

# 7. 评估和分析 (新增)

# 绘制学习曲线
pd.DataFrame(history.history).plot(figsize=(8, 5))
plt.grid(True)
plt.gca().set_ylim(0, 1) # 根据需要调整y轴范围
plt.title("Model Training History")
plt.xlabel("Epochs")
plt.ylabel("Loss / Metric")
plt.show()

# 在测试集上评估最终模型
# (由于 restore_best_weights=True, model 已恢复到最佳状态)
print("\n--- 评估测试集 ---")
test_loss, test_mae = model.evaluate(X_test, y_test)
print(f"Test MSE: {test_loss:.4f}")
print(f"Test MAE: {test_mae:.4f}")


loaded_model = keras.models.load_model("my_best_model.h5")
X_new = X_test[:3]
y_pred = loaded_model.predict(X_new)
print("\n--- 预测示例 ---")
print("Predictions:", y_pred.flatten())
print("Actual values:", y_test[:3])




Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [2]:
# 如果要在训练期间使用验证集 则可以在创建modelcheckpoint时设置save_best_only=True
checkpoint_cb = keras.callbacks.ModelCheckpoint("best_model.h5", save_best_only=True)
history = model.fit(X_train, y_train, epochs=20, validation_data=(X_valid, y_valid), callbacks=[checkpoint_cb])
model = keras.models.load_model("best_model.h5")

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
