In [None]:
# 1. 准备数据，如必要，将数据转换为tensor

from keras.datasets import reuters
import numpy as np

# 导入数据
(train_data, train_labels), (test_data, test_labels) = reuters.load_data(num_words=10000)


# 定义tensor化的函数，to_one_shot是在对标签做one-shot编码，也可以使用直接向量化
def vectorize_sequences(sequences, dimension=10000):
    results = np.zeros((len(sequences), dimension))
    for i, sequence in enumerate(sequences):
        results[i, sequence] = 1
    return results

def to_one_shot(labels, dimension = 46):
    results = np.zeros((len(labels), dimension))
    for i, label in enumerate(labels):
        results[i, label] = 1
    return results


x_train = vectorize_sequences(train_data)
one_shot_train = to_one_shot(train_labels)
x_val = x_train[:1000]
y_val = one_shot_train[:1000]
x_realtrain = x_train[1000:]
y_realtrain = one_shot_train[1000:]
x_test = vectorize_sequences(test_data)
one_shot_test = to_one_shot(test_labels)

In [None]:
# 2. 构建网络

from keras import models
from keras import layers
model = models.Sequential()
model.add(layers.Dense(10000,))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(46, activation='softmax'))

In [None]:
# 3. 选择合适的优化器、目标函数、关心值
model.compile(optimizer='rmsprop', loss='categorical_crossentropy',metrics=['accuracy'])

In [None]:
# 4. 训练模型，并给出验证集

model_fit = model.fit(x_realtrain, y_realtrain, epochs=20, batch_size=512, validation_data=(x_val, y_val))

In [None]:
# 5. 通过验证记得准确性和损失率，看是否过拟合

import matplotlib.pyplot as plt

loss = model_fit.history['loss']
val_loss = model_fit.history['val_loss']
acc = model_fit.history['accuracy']
val_acc = model_fit.history['val_accuracy']
epochs = range(1, len(loss) + 1)

fig, (ax1, ax2) = plt.subplots(2,1)
ax1.plot(epochs, acc, 'bo', label='Training acc')
ax1.plot(epochs, val_acc, 'b', label='Validation acc')
ax2.plot(epochs, loss, 'bo', label='Training loss')
ax2.plot(epochs, val_loss, 'b', label='Validation loss')

In [None]:
# 调整网络的迭代次数，打开在9次左右，在测试集上就过拟合了

notfit_model = models.Sequential()
notfit_model.add(layers.Dense(64, activation='relu', input_shape=(10000,)))
notfit_model.add(layers.Dense(64, activation='relu'))
notfit_model.add(layers.Dense(46, activation='softmax'))
notfit_model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
notfit_model.fit(x_realtrain, y_realtrain, epochs=9, batch_size=512, validation_data=(x_val, y_val))
results = notfit_model.evaluate(x_test, one_shot_test)

In [None]:
print("test loss, test acc:", results)