In [1]:
# 导入所需要的包
%matplotlib inline
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Dense, Activation, Dropout
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow import initializers as init
from sklearn.model_selection import train_test_split

# 查看tf、keras版本
# print(tf.__version__)
# print(keras.__version__)

In [2]:
# 使用pandas读取数据
path = 'E:/3_code/4_python/ML/Dive-into-DL-TensorFlow2.0/data/kaggle_house/'
train_data = pd.read_csv(path + 'train.csv')
test_data = pd.read_csv(path + 'test.csv')

In [3]:
# 将所有特征按列连接，为后面将非数值化特征变为数值化特征做准备
all_features = pd.concat((train_data.iloc[:, 1:-1], test_data.iloc[:, 1:]))

In [4]:
# 获取特征为数值的下标
numeric_features = all_features.dtypes[all_features.dtypes != 'object'].index
all_features[numeric_features] = all_features[numeric_features].apply(lambda x: (x - x.mean()) / (x.std()))
all_features[numeric_features] = all_features[numeric_features].fillna(0)

# 将非数值特征转化为数值特征, 将缺失值也作为合法的特征值
all_features = pd.get_dummies(all_features, dummy_na=True)

In [5]:
# 分割数据集 并且将pandas转为numpy
n_train = train_data.shape[0]
train_features = np.array(all_features[:n_train].values, dtype=np.float)
test_features = np.array(all_features[n_train:].values, dtype=np.float)
train_labels = np.array(train_data.SalePrice.values.reshape(-1, 1), dtype=np.float)

In [6]:
def get_net():
    net = keras.models.Sequential()
    net.add(Dense(256, activation='relu'))
    # net.add(Dropout(0.2))
    # net.add(Dense(128, activation='relu'))
    # net.add(Dropout(0.3))
    # net.add(Dense(64, activation='relu'))
    # net.add(Dropout(0.4))
    net.add(Dense(32, activation='relu'))
    # net.add(Dropout(0.5))
    net.add(Dense(1, activation='relu'))
    return net

In [7]:
def start_train(X_train, y_train, num_epochs, learning_rate, weight_decay, batch_size):
    train_l_sum, valid_l_sum = 0, 0
    data = train_test_split(X_train, y_train, test_size=0.2, random_state=42)
    net = get_net()
    net.compile(
        loss=tf.keras.losses.mean_squared_logarithmic_error, 
        optimizer=keras.optimizers.Adam(learning_rate))
    # 当性能没有提升时，停止训练
    monitor = keras.callbacks.EarlyStopping(
        monitor='val_loss', min_delta=1e-3,
        patience=10, verbose=1, mode='auto', 
        restore_best_weights=True)
    net.fit(data[0], data[2], validation_data=(data[1], data[3]), 
        epochs=num_epochs, callbacks=[monitor],
        batch_size=batch_size, validation_freq=1, verbose=2)
    return net

In [8]:
k, num_epochs, lr, weight_decay, batch_size = 5, 1000, 1, 0, 64
net = start_train(train_features, train_labels, num_epochs, lr,
                          weight_decay, batch_size)
preds=np.array(net.predict(test_features))

Train on 1168 samples, validate on 292 samples
Epoch 1/1000
1168/1168 - 1s - loss: 89.2315 - val_loss: 3.5421
Epoch 2/1000
1168/1168 - 0s - loss: 8.8985 - val_loss: 139.2452
Epoch 3/1000
1168/1168 - 0s - loss: 144.7665 - val_loss: 144.1303
Epoch 4/1000
1168/1168 - 0s - loss: 144.3220 - val_loss: 143.7161
Epoch 5/1000
1168/1168 - 0s - loss: 117.3370 - val_loss: 5.7042
Epoch 6/1000
1168/1168 - 0s - loss: 1.7869 - val_loss: 0.2361
Epoch 7/1000
1168/1168 - 0s - loss: 0.1682 - val_loss: 0.1326
Epoch 8/1000
1168/1168 - 0s - loss: 0.0866 - val_loss: 0.0635
Epoch 9/1000
1168/1168 - 0s - loss: 0.0611 - val_loss: 0.0557
Epoch 10/1000
1168/1168 - 0s - loss: 0.0494 - val_loss: 0.0417
Epoch 11/1000
1168/1168 - 0s - loss: 0.0359 - val_loss: 0.0344
Epoch 12/1000
1168/1168 - 0s - loss: 0.0264 - val_loss: 0.0306
Epoch 13/1000
1168/1168 - 0s - loss: 0.0227 - val_loss: 0.0291
Epoch 14/1000
1168/1168 - 0s - loss: 0.0194 - val_loss: 0.0262
Epoch 15/1000
1168/1168 - 0s - loss: 0.0172 - val_loss: 0.0286
Epoc