In [53]:
from keras.datasets import reuters

In [54]:
(train_data, train_labels), (test_data, test_labels) = reuters.load_data(num_words=10000)

In [55]:
len(train_data), len(test_data)

(8982, 2246)

In [56]:
train_data[10]

[1,
 245,
 273,
 207,
 156,
 53,
 74,
 160,
 26,
 14,
 46,
 296,
 26,
 39,
 74,
 2979,
 3554,
 14,
 46,
 4689,
 4329,
 86,
 61,
 3499,
 4795,
 14,
 61,
 451,
 4329,
 17,
 12]

In [57]:
word_index = reuters.get_word_index()

In [58]:
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])
decoded_newswire = ' '.join([reverse_word_index.get(i - 3, '?') for i in train_data[10]])

In [59]:
decoded_newswire

'? period ended december 31 shr profit 11 cts vs loss 24 cts net profit 224 271 vs loss 511 349 revs 7 258 688 vs 7 200 349 reuter 3'

In [60]:
# Let's proceed to vectorize the data and prepare it for Keras as a tensor vector

In [61]:
import numpy as np

def vectorize_sequences(sequences, dimension=10000):
    results = np.zeros([len(sequences), dimension])
    for i, sequence in enumerate(sequences):
        results[i, sequence] = 1.0
    return results

In [62]:
x_train = vectorize_sequences(train_data)

In [63]:
x_test = vectorize_sequences(test_data)

In [13]:
# Let's encode the vector using one-hot encoding instead

In [16]:
def to_one_hot(labels, dimension=64):
    results = np.zeros((len(labels), dimension))
    for i, label in enumerate(labels):
        results[i, label] = 1.0
    return results

In [17]:
one_hot_train_labels = to_one_hot(train_labels)

In [19]:
one_hot_test_labels = to_one_hot(test_labels)

In [51]:
# Other possible solution could be to use the package utility

In [64]:
from keras.utils.np_utils import to_categorical

one_hot_train_labels = to_categorical(train_labels)
one_hot_test_labels = to_categorical(test_labels)

In [20]:
# Since we have 46 different classes, 16 hidden units can cause information bottleneck in the system.
# So lets go to 64 hidden units to avoid that

In [70]:
from keras import models
from keras import layers

model = models.Sequential()
model.add(layers.Dense(64, activation='relu', input_shape=(10000,)))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(46, activation='softmax'))

In [71]:
model.compile(optimizer='rmsprop',
             loss='categorical_crossentropy',
             metrics = ['accuracy'])

In [72]:
x_val = x_train[:10000]
partial_x_train = x_train[10000:]

y_val = one_hot_train_labels[:10000]
partial_y_train = one_hot_train_labels[10000:]

In [73]:
partial_x_train.shape

(0, 10000)

In [74]:
model.compile(optimizer='rmsprop',
             loss='categorical_crossentropy',
             metrics = ['accuracy'])

history = model.fit(partial_x_train, partial_y_train, epochs=20, batch_size=512, validation_data=(x_val, y_val))

Train on 0 samples, validate on 8982 samples
Epoch 1/20


AttributeError: 'ProgbarLogger' object has no attribute 'log_values'