## Reuters Newswire Dataset 
<br>
A collection of newswire data is assembled for text classification purposes, and full description of the dataset can be found at [UCI machine learning repositoty](https://archive.ics.uci.edu/ml/datasets/reuters-21578+text+categorization+collection). Load data to jupyter notebook with Keras. 

In [None]:
import numpy as np
import pandas as pd
from collections import Counter

#### Load Data

In [None]:
from keras.datasets import reuters 

n = 10000  # top 10000 most common words

(train_data, train_label), (test_data, test_label) = reuters.load_data(num_words=n)
print('Number of training examples: ', train_data.shape[0])
print('Number of test examples: ', test_data.shape[0])

print('Example training data: ', train_data[0])
print('Example training data label: ', train_label[0])

#### Decode Data to Newswire

In [None]:
def decode_newswire(example):
    """
        Args:
            List of word indices 
        Returns:
            List of words matched to given indices
    """
    word_to_index = reuters.get_word_index()
    index_to_word = {key: value for (value, key) in word_to_index.items()}
    words = [index_to_word.get(i-3, 'UNK') for i in example] #indices offset by 3
    return ' '.join(words)

In [None]:
# print one example newswire
decode_newswire(train_data[0])

In [None]:
print('Number of exmples for each topic label: ', Counter(train_label))

### 1. Data Preprocessing 

All observations in traning dataset are lists of word indices. 

#### Construct Vectorized Input Data 

In [None]:
def construct_input_vectors(X, N):
    """vectorize newswire data"""
    input = np.zeros((X.shape[0], N))
    for i in range(X.shape[0]):
        for j in range(len(X[i])):
            input[i][X[i][j]] = 1
    return input

In [None]:
X_train = construct_input_vectors(train_data, n)
y_train = train_label
X_test = construct_input_vectors(test_data, n)
y_test = test_label

In [None]:
#y_train = pd.get_dummies(train_label).values

### 2. Construct Neural Network 



In [None]:
def relu_activation(X, W, b):
    Z = np.maximum(np.dot(X, W) + b, 0) # element-wise max between two arrays
    return Z

In [None]:
def softmax(A):
    exps = np.exp(A - np.max(A, axis=1, keepdims=True)) # prevent overflow
    return exps / np.sum(exps, axis=1, keepdims=True) 

In [None]:
def cross_entropy_loss(model_output, target):
    ce = -np.sum(target * np.log(model_output) + (1 - target) * np.log(1 - model_output))
    return ce

In [None]:
def back_propagation(delta, X):  
    gradient = np.dot(X.T, delta)
    gradient[X < 0] = 0
    return gradient

In [None]:
def evaluate_accuracy(y, X, W1, b1, W2, b2):
    A1 = relu_activation(X, W1, b1)
    class_prob = np.dot(A1, W2) + b2
    pred = np.argmax(class_prob, axis=1)
    print('prediction accuracy: %.2f%%' % (100 * np.mean(pred == y)))

In [None]:
h = 100 # size of hidden layer
num_classes = 46 # number of classes
batch_size = 100 #X_train.shape[0]
num_batches = int(X_train.shape[0] / batch_size) + 1 
learning_rate = 1
epochs = 100

In [None]:
# initialize parameters 
np.random.seed(0)

W1 = 0.01 * np.random.randn(n, h)
b1 = np.zeros((1, h))
W2 = 0.01 * np.random.randn(h, num_classes)
b2 = np.zeros((1, num_classes))

In [None]:
# batch gradient descent 

for i in range(epochs):
    
    for j in range(num_batches):
        
        X_batch = X_train[j*batch_size : (j+1)*batch_size:, :]
        y_batch = y_train[j*batch_size : (j+1)*batch_size:]
        
        # forward propogation
        A1 = relu_activation(X_batch, W1, b1) 
        A2 = np.dot(A1, W2) + b2
        probs = softmax(A2)  

        # cross entropy loss for target
        target_logprob = -np.log(probs[range(batch_size), y_batch])
        loss = np.sum(target_logprob) / batch_size

        # compute the gradient on scores
        d2 = probs
        d2[range(batch_size), y_batch] -= 1
        d2 /= batch_size

        # backprop W2 b2
        dW2 = np.dot(A1.T, d2)
        db2 = np.sum(d2, axis=0, keepdims=True)

        # backprop into hidden layer
        d1 = np.dot(d2, W2.T)
        d1[A1 <= 0] = 0

        # backprop W1 b1
        dW1 = np.dot(X_batch.T, d1)
        db1 = np.sum(d1, axis=0, keepdims=True)

        # update weights
        W1 -= learning_rate * dW1
        b1 -= learning_rate * db1
        W2 -= learning_rate * dW2
        b2 -= learning_rate * db2

        if i % 10 == 0:
            print("epoch {0}: loss {1}".format(i, loss)) 
            # evaluate training set accuracy
            evaluate_accuracy(y_train, X_train, W1, b1, W2, b2)

In [None]:
# evaluate test set accuracy
evaluate_accuracy(y_test, X_test, W1, b1, W2, b2)