### Example 2: Classifying articles according to topic

In [26]:
from tensorflow.keras.datasets import reuters
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import matplotlib.pyplot as plt

(train_data, train_labels), (test_data, test_labels) = reuters.load_data(num_words=10000)


In [27]:
def decode_document(document, dataset):
    """
    Use the dataset's index to turn our lists of word numbers back into 
    words
    """
    word_index = dataset.get_word_index()
    reverse_index = dict([(value, key) for (key, value) in word_index.items()])
    return " ".join([reverse_index.get(index - 3, "?") for index in document])

def vectorise_sequences(sequences, dimension = 10000):
    """
    Return a 'multi-hot' encoding of a sequence of integers
    e.g. vectorise_sequences([3,5]) = [0,0,1,0,1,0,0...]
    """
    # Initialise empty results vector
    results = np.zeros((len(sequences), dimension))
    for i, sequence in enumerate(sequences):
        for j in sequence:
            results[i,j] = 1.
    return results

def to_one_hot(labels, dimension=46):
    """
    Convert labels to a one-hot vector
    to_one_hot(3) = [0,0,1,0,0,0...]
    """
    results = np.zeros((len(labels), dimension))
    for i, label in enumerate(labels):
        results[i, label] = 1.
    return results

In [28]:
# Creat vectorised training and test sets, and labels
x_train = vectorise_sequences(train_data)
x_test = vectorise_sequences(test_data)
y_train = to_one_hot(train_labels)
y_test = to_one_hot(test_labels)

In [33]:
model = keras.Sequential([
    layers.Dense(64, activation="relu"),
    layers.Dense(64, activation="relu"),
    layers.Dense(46, activation="softmax"),
])

model.compile(optimizer="rmsprop",
              loss="categorical_crossentropy",
              metrics=["accuracy"])

In [30]:
# Set aside some values
x_val = x_train[:1000]
partial_x_train = x_train[1000:]
y_val = y_train[:1000]
partial_y_train = y_train[1000:]

In [34]:
history = model.fit(
    partial_x_train,
    partial_y_train,
    epochs = 8,
    batch_size=512,
    validation_data=(x_val, y_val)
)


Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


In [35]:
results = model.evaluate(x_test, y_test)
results



[0.9945515394210815, 0.7809438705444336]

##### Set up a random baseline for this 36 class problem

In [36]:
import copy
test_labels_copy = copy.copy(test_labels)
np.random.shuffle(test_labels_copy)
hits_array = np.array(test_labels) == np.array(test_labels_copy)
hits_array.mean()

NameError: name 'mean' is not defined

### Scratch below

In [32]:
decode_document(train_data[5345], reuters)

'? the bank of japan intervened in tokyo to buy dollars just after the market opened dealers said the dollar opened at 142 05 yen against 142 15 25 in new york and 142 50 at the close here yesterday the bank stepped into the market amid selling pressure from interbank dealers dealers said reuter 3'