In [4]:
from keras.datasets import reuters
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Embedding, SimpleRNN
import numpy as np
from keras.layers import Dropout


max_words = 10000

# Load the Reuters dataset
(x_train, y_train), (x_test, y_test) = reuters.load_data(num_words=max_words, test_split=0.2)

# Get the class names
class_names = ["cocoa","grain","veg-oil","earn","acq","wheat","copper","housing","money-supply",
               "coffee","sugar","trade","reserves","ship","cotton","carcass","crude","nat-gas",
               "cpi","money-fx","interest","gnp","meal-feed","alum","oilseed","gold","tin",
               "strategic-metal","livestock","retail","ipi","iron-steel","rubber","heat","jobs",
               "lei","bop","zinc","orange","pet-chem","dlr","gas","silver","wpi","hog","lead"]

# One of the simplest ways to represent text 
# word_index[word] = index
# Define the word index 
word_index = reuters.get_word_index() 
word_index = {k: (v+3) for k, v in word_index.items()}
word_index['<PAD>'] = 0
word_index['<START>'] = 1
word_index['<UNK>'] = 2
word_index['<UNUSED>'] = 3

# Reverse word index
# reverse_word_index[index] = word
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])

# Set the maximum sequence length
maxlen = 100

# Pad the sequences
# All words need to have the same size
# Padding at the end (can also pad at the beginning)
x_train = pad_sequences(x_train, padding='post', maxlen=maxlen)  
x_test = pad_sequences(x_test, padding='post', maxlen=maxlen) 

In [5]:
# One-hot encode the labels
# Converts class number to bit (to prevent model from liking high numbers)
num_classes = len(class_names)
y_train_one_hot = to_categorical(y_train, num_classes)
y_test_one_hot = to_categorical(y_test, num_classes)

In [6]:
# Define and train the RNN model
from keras.layers import Dense, Dropout, Embedding, SimpleRNN
from keras.models import Sequential

# Define the SimpleRNN model
model = Sequential()
model.add(Embedding(max_words, 256))
model.add(SimpleRNN(256, activation='tanh', return_sequences=True))
model.add(Dropout(0.5)) # Avoid overfitting
model.add(SimpleRNN(256, activation='tanh')) # Allows positive and negative
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))  # Can't use sigmoid bc there are multiple classes

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) # Adapts learning rate  

# Train the model
model.fit(x_train, y_train_one_hot, epochs=10, batch_size=128, validation_split=0.1)



Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


KeyboardInterrupt: 

In [7]:
# Print some samples from the test set along with the predicted label
for i in range(20):
    x_sample = x_test[i]
    y_true = np.argmax(y_test_one_hot[i])
    y_pred = np.argmax(model.predict(x_sample.reshape(1, maxlen))[0])
    sample_text = ' '.join([reverse_word_index.get(idx, '') for idx in x_sample])
    print(f'Test sample {i+1}: {sample_text}')
    print(f'True label: {class_names[y_true]}, Predicted label: {class_names[y_pred]} ({y_pred})\n')


Test sample 1: of <UNK> in august 1986 and <UNK> in december helped us achieve better than expected results in the fourth quarter ended february 28 its net income from continuing operations jumped 52 6 pct to 20 7 mln dlrs or 55 cts a share in the latest quarter as sales increased 48 3 pct to 1 58 billion dlrs a and p gave no details on the expanded capital program but it did say it completed the first year of the program during 1986 a and p is 52 4 pct owned by lt <UNK> <UNK> of west germany reuter 3
True label: earn, Predicted label: earn (3)

Test sample 2: without any justification manila was <UNK> watching washington's moves to cut domestic support prices to 12 cents a pound from 18 cents the u s agriculture department last december slashed its 12 month 1987 sugar import quota from the philippines to 143 780 short tons from 231 660 short tons in 1986 yulo said despite next year's increased production target some philippine mills were expected to shut down at least four of the 41 m

In [8]:
from sklearn.metrics import classification_report

# Get the predicted labels
y_pred = model.predict(x_test)

# Convert the predicted probabilities to labels
y_pred = np.argmax(y_pred, axis=1)

# Print the classification report
print(classification_report(y_test, y_pred, target_names=class_names))



                 precision    recall  f1-score   support

          cocoa       0.00      0.00      0.00        12
          grain       0.00      0.00      0.00       105
        veg-oil       0.00      0.00      0.00        20
           earn       0.38      1.00      0.55       813
            acq       0.11      0.02      0.04       474
          wheat       0.00      0.00      0.00         5
         copper       0.00      0.00      0.00        14
        housing       0.00      0.00      0.00         3
   money-supply       0.00      0.00      0.00        38
         coffee       0.00      0.00      0.00        25
          sugar       0.00      0.00      0.00        30
          trade       0.00      0.00      0.00        83
       reserves       0.00      0.00      0.00        13
           ship       0.00      0.00      0.00        37
         cotton       0.00      0.00      0.00         2
        carcass       0.00      0.00      0.00         9
          crude       0.00    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [9]:
from keras.layers import LSTM

model = Sequential()
model.add(Embedding(max_words, 256))
model.add(LSTM(128, activation='tanh', return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(128, activation='tanh'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train_one_hot, epochs=10, batch_size=128, validation_split=0.1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x1663e5310>

In [None]:
from sklearn.metrics import classification_report

# Get the predicted labels
y_pred = model.predict(x_test)

# Convert the predicted probabilities to labels
y_pred = np.argmax(y_pred, axis=1)

# Print the classification report
print(classification_report(y_test, y_pred, target_names=class_names))

                 precision    recall  f1-score   support

          cocoa       0.00      0.00      0.00        12
          grain       0.19      0.87      0.32       105
        veg-oil       0.00      0.00      0.00        20
           earn       0.92      0.79      0.85       813
            acq       0.66      0.85      0.74       474
          wheat       0.00      0.00      0.00         5
         copper       0.00      0.00      0.00        14
        housing       0.00      0.00      0.00         3
   money-supply       0.00      0.00      0.00        38
         coffee       0.00      0.00      0.00        25
          sugar       0.00      0.00      0.00        30
          trade       0.31      0.45      0.36        83
       reserves       0.00      0.00      0.00        13
           ship       0.17      0.22      0.19        37
         cotton       0.00      0.00      0.00         2
        carcass       0.00      0.00      0.00         9
          crude       0.21    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
