In [None]:
import tensorflow as tf
from tensorflow import keras

import numpy as np

In [None]:
print(tf.__version__)

In [None]:
imdb = keras.datasets.imdb

(train_data, train_label), (test_data, test_label) = imdb.load_data(num_words=10000)

In [None]:
print("training entires {}, labels: {}".format(len(train_data), len(train_label)))
print("testing entires {}, labels: {}".format(len(test_data), len(test_label)))

In [None]:
print(train_data[0])
print(train_label[0])

In [None]:
print(len(train_data[0]))
print(len(train_data[1]))

In [None]:
word_index = imdb.get_word_index()

In [None]:
print(len(word_index))

In [None]:
word_index = {
    k: (v + 3) for k, v in word_index.items()
}

word_index["<PAD>"] = 0
word_index["<START>"] = 1
word_index["<UNK"] = 2
word_index["UNUSED"] = 3

In [None]:
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])

In [None]:
def decode_review(text):
    return ' '.join([reverse_word_index.get(i, '?') for i in text])

In [None]:
decode_review(train_data[0])

In [None]:
train_data = keras.preprocessing.sequence.pad_sequences(train_data, value = word_index["<PAD>"], padding = 'post', maxlen = 256)
test_data = keras.preprocessing.sequence.pad_sequences(test_data, value = word_index["<PAD>"], padding = 'post', maxlen = 256)

In [None]:
(len(train_data[0]), len(train_data[1]))

In [None]:
print(train_data[0])

In [None]:
vocab_size = 10000

model = keras.Sequential()

model.add(keras.layers.Embedding(vocab_size, 16))
model.add(keras.layers.GlobalAveragePooling1D())
model.add(keras.layers.Dense(16, activation='relu'))
model.add(keras.layers.Dense(1, activation='sigmoid'))

model.summary()

In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
x_val = train_data[:10000]
x_train = train_data[10000:]

y_val = train_label[:10000]
y_train = train_label[10000:]

In [None]:
history = model.fit(x_train, y_train, epochs=40, batch_size=512, validation_data=(x_val, y_val), verbose=1)

In [None]:
results = model.evaluate(test_data, test_label, verbose = 2)

print(results)

In [None]:
history_dict = history.history

history_dict.keys()

In [None]:
import matplotlib.pyplot as plt

In [None]:
acc = history_dict['accuracy']
val_acc = history_dict['val_accuracy']

loss = history_dict['loss']
val_loss = history_dict['val_loss']

epochs = range(1, len(acc) + 1)

# bo 代表蓝点
plt.plot(epochs, loss, 'bo', label = 'Training loss')
# b 代表蓝色实线
plt.plot(epochs, val_loss, 'b', label = 'Validation loss')

plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')

plt.legend()
plt.grid()

plt.show()

In [None]:
plt.clf()

plt.plot(epochs, acc, 'bo', label = 'Training acc')
plt.plot(epochs, val_acc, 'b', label = 'Validation acc')

plt.title('Training and validation acc')
plt.xlabel('Epochs')
plt.ylabel('Acc')
plt.legend()
plt.grid()

plt.show()