<a href="https://colab.research.google.com/github/keivanipchihagh/Intro_To_MachineLearning/blob/master/Models/Newswires_Classification_with_Reuters.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Newswires Classification with Reuters

##### Imports

In [21]:
import numpy as np                                # Numpy
from matplotlib import pyplot as plt              # Matplotlib
import keras                                      # Keras
import pandas as pd                               # Pandas
from keras.datasets import reuters                # Reuters Dataset
from keras.utils.np_utils import to_categorical   # Categirical Classifier
import random                                     # Random

##### Load dataset

In [None]:
(train_data, train_labels), (test_data, test_labels) = reuters.load_data(num_words = 10000)
print('Size:', len(train_data))
print('Training Data:', train_data[0])

##### Get the feel of data

In [None]:
def decode(index):  # Decoding the sequential integers into the corresponding words
  word_index = reuters.get_word_index()
  reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])
  decoded_newswire = ' '.join([reverse_word_index.get(i - 3, '?') for i in test_data[0]])
  return decoded_newswire

print("Decoded test data sample [0]: ", decode(0))

##### Data Prep (One-Hot Encoding)

In [4]:
def vectorize_sequences(sequences, dimension = 10000):    # Encoding the integer sequences into a binary matrix
  results = np.zeros((len(sequences), dimension))
  for i, sequence in enumerate(sequences):
    results[i, sequence] = 1.
  return results

train_data = vectorize_sequences(train_data)
test_data = vectorize_sequences(test_data)

train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)

##### Building the model

In [None]:
model = keras.models.Sequential()
model.add(keras.layers.Dense(units = 64, activation = 'relu', input_shape = (10000,)))
model.add(keras.layers.Dense(units = 64, activation = 'relu'))
model.add(keras.layers.Dense(units = 46, activation = 'softmax'))
model.compile( optimizer = 'rmsprop', loss = 'categorical_crossentropy', metrics = ['accuracy'])
model.summary()

##### Training the model

In [6]:
x_val = train_data[:1000]
train_data = train_data[1000:]
y_val = train_labels[:1000]
train_labels = train_labels[1000:]

history = model.fit(train_data, train_labels, batch_size = 512, epochs = 10, validation_data = (x_val, y_val), verbose = False)

##### Evaluating the model

In [None]:
result = model.evaluate(train_data, train_labels)
print('Loss:', result[0])
print('Accuracy:', result[1] * 100)

##### Statistics

In [None]:
epochs = range(1, len(history.history['loss']) + 1)
plt.plot(epochs, history.history['loss'], 'b', label = 'Training Loss')
plt.plot(epochs, history.history['val_loss'], 'r', label = 'Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

plt.clf()
plt.plot(epochs, history.history['accuracy'], 'b', label = 'Training Accuracy')
plt.plot(epochs, history.history['val_accuracy'], 'r', label = 'Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

##### Making predictions

In [None]:
prediction_index = random.randint(0, len(test_data))
prediction_data = test_data[prediction_index]
decoded_prediction_data = decode(prediction_index)

# Info
print('Random prediction index:', prediction_index)
print('Original prediction Data:', prediction_data)
print('Decoded prediction Data:', decoded_prediction_data)
print('Expected prediction label:', np.argmax(test_labels[prediction_index]))

# Prediction
predictions = model.predict(test_data)
print('Prediction index: ', np.argmax(predictions[prediction_index]))