<a href="https://colab.research.google.com/github/Mrsnellek/MSDS_686_22F8W2/blob/23S8W1/MSDS%20686/Week_3/Reuters_Regularization_and_Dropout_Example.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### This Example was adapted from Deep Learning with Python Chapters 5 and 7 Chollet, F. (2021). Deep Learning with Python (2nd ed.). Greenwich, CT, USA: Manning Publications Co.

In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

## We will improve our previous Reuters neural net by adjusting learning rate, and adding dropout, and early stopping.

In [None]:
from keras.datasets import reuters
import matplotlib.pyplot as plt
import numpy as np
np.random.seed(1)
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf

# When we import the data we only select the 10,000 most common words in the Reuters dataset.
(train_data, train_labels), (test_data, test_labels) = reuters.load_data(num_words=10000)

## Previous Baseline Model

In [None]:
# Remember, we cannot add a single vector to a neural network. The data needs to be 
# converted to a tensor. This function will create a tensor that is N by 10000.
# N is the number of samples and 10000 is the number of unique words. The sparse tensor
# will have all zeroes except for ones where that word is in the review
def vectorize_sequences(sequences, dimension=10000):
    word_matrix = tf.sparse.SparseTensor(
        # The non-zero locations in each row correspond to the word indices that are found in the document
        indices=[[row_idx, word_idx] for row_idx, word_indices in enumerate(sequences) for word_idx in set(word_indices)],
        # Use "1" as the value of each non-zero index (indicating the word is used in the document)
        values=[1 for row_idx, word_indices in enumerate(sequences) for word_idx in set(word_indices)],
        # The overall tensor shape
        dense_shape=[len(sequences), dimension]
    )
    # Optimize by ordering the non-zero indices in ascending row-major order
    word_matrix = tf.sparse.reorder(word_matrix)
    return word_matrix

In [None]:
# Split the data into training and validataion sets with a 75/25 split
train_data, valid_data, train_labels, valid_labels = train_test_split(
    train_data, train_labels, test_size=0.25, random_state=42)

In [None]:
# Apply the vectorize function to the train_data and test_data
x_train = vectorize_sequences(train_data)
x_valid = vectorize_sequences(valid_data)
x_test = vectorize_sequences(test_data)

In [None]:
# Convert the y targets to categorical
from keras.utils.np_utils import to_categorical
y_train = to_categorical(train_labels)
y_valid = to_categorical(valid_labels)
y_test = to_categorical(test_labels)

In [None]:
# Import the keras libraries
from keras import models, layers, optimizers
from keras.callbacks import EarlyStopping
from keras import backend

In [None]:
# Build the model architecture.  Input shape must equal the number of vectors or (unique words).
# Add one hidden layer with 64 nodes.  Keep the activation function as 'relu'
# Since we have a multiclass classification, the output activation function will be 'softmax'
# We will keep with the 'rmsprop' optomizer function, loss = 'categorical_crossentropy', and metrics = 'accuracy'
backend.clear_session()
model = models.Sequential()
model.add(layers.Dense(64, activation = 'relu', input_shape = (10000,)))
model.add(layers.Dense(64, activation = 'relu'))
model.add(layers.Dense(46, activation = 'softmax'))
model.compile(optimizer='rmsprop',
             loss = 'categorical_crossentropy',
             metrics = ['accuracy'])

In [None]:
# Fit the model to the training data. 
baseline_history = model.fit(x_train,
                   y_train,
                   epochs = 50,
                   batch_size = 512,
                   validation_data = (x_valid, y_valid))

In [None]:
baseline_history = pd.DataFrame(baseline_history.history)
display(baseline_history)

In [None]:
# Let's plot the Loss vs Epochs and Accuracy vs Epochs
baseline_loss_values = baseline_history['loss']
baseline_val_loss_values = baseline_history['val_loss']
baseline_acc_values = baseline_history['accuracy']
baseline_val_acc_values = baseline_history['val_accuracy']
epochs = range(1, len(baseline_history['accuracy']) + 1)

In [None]:
plt.plot(epochs, baseline_loss_values, 'bo', label = 'Baseline Training loss')
plt.plot(epochs, baseline_val_loss_values, 'b', label = 'Baseline Validation loss')
plt.title('Baseline Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Baseline Loss')
plt.legend()
plt.show()

In [None]:
plt.plot(epochs, baseline_acc_values, 'bo', label = 'Baseline Training accuracy')
plt.plot(epochs, baseline_val_acc_values, 'b', label = 'Baseline Validation accuracy')
plt.title('Baseline Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Baseline Accuracy')
plt.legend()
plt.show()

## How does learning rate affect accuracy?
RMSprop default learning rate = 0.001. https://keras.io/api/optimizers/rmsprop/ 

In [None]:
# Change the learning rate to 1
backend.clear_session()
model = models.Sequential()
model.add(layers.Dense(64, activation = 'relu', input_shape = (10000,)))
model.add(layers.Dense(64, activation = 'relu'))
model.add(layers.Dense(46, activation = 'softmax'))

model.compile(optimizer=optimizers.RMSprop(1),
             loss = 'categorical_crossentropy',
             metrics = ['accuracy'])

lr_history = model.fit(x_train,
              y_train,
              epochs = 50,
              batch_size = 500,
              validation_data = (x_valid, y_valid))

history_dict = lr_history.history
loss_values = history_dict['loss']
val_loss_values = history_dict['val_loss']
acc_values = history_dict['accuracy']
val_acc_values = history_dict['val_accuracy']
epochs = range(1, len(history_dict['accuracy']) + 1)

plt.plot(epochs, baseline_val_loss_values, 'bo', label = 'Baseline validation loss')
plt.plot(epochs, val_loss_values, 'b', label = 'Validation loss')
plt.title('Baseline and Current Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

plt.plot(epochs, baseline_val_acc_values, 'bo', label = 'Baseline validation accuracy')
plt.plot(epochs, val_acc_values, 'b', label = 'Validation accuracy')
plt.title('Baseline and Current Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

results = model.evaluate(x_test, y_test)
print(model.metrics_names)
print(results)

Not very good.  The learning rate is too high and cannot converge on a decent accuracy. What happens if we lower the learning to 0.0005?

In [None]:
# Change the learning rate to 0.0005
backend.clear_session()
model = models.Sequential()
model.add(layers.Dense(64, activation = 'relu', input_shape = (10000,)))
model.add(layers.Dense(64, activation = 'relu'))
model.add(layers.Dense(46, activation = 'softmax'))

model.compile(optimizer=optimizers.RMSprop(0.0005),
             loss = 'categorical_crossentropy',
             metrics = ['accuracy'])

lr_history = model.fit(x_train,
              y_train,
              epochs = 50,
              batch_size = 500,
              validation_data = (x_valid, y_valid))

history_dict = lr_history.history
loss_values = history_dict['loss']
val_loss_values = history_dict['val_loss']
acc_values = history_dict['accuracy']
val_acc_values = history_dict['val_accuracy']
epochs = range(1, len(history_dict['accuracy']) + 1)

plt.plot(epochs, baseline_val_loss_values, 'bo', label = 'Baseline validation loss')
plt.plot(epochs, val_loss_values, 'b', label = 'Validation loss')
plt.title('Baseline and Current Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

plt.plot(epochs, baseline_val_acc_values, 'bo', label = 'Baseline Training accuracy')
plt.plot(epochs, val_acc_values, 'b', label = 'Validation accuracy')
plt.title('Baseline and Current Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

results = model.evaluate(x_test, y_test)
print(model.metrics_names)
print(results)

Lowering the learning rate to 0.005 creates a nice loss and accuracy curve that keeps improving.

## Using a learning_rate = 0.0005, let us adjust the batch size

In [None]:
# Lower batch size to 128
backend.clear_session()
model = models.Sequential()
model.add(layers.Dense(64, activation = 'relu', input_shape = (10000,)))
model.add(layers.Dense(64, activation = 'relu'))
model.add(layers.Dense(46, activation = 'softmax'))

model.compile(optimizer=optimizers.RMSprop(0.0005),
             loss = 'categorical_crossentropy',
             metrics = ['accuracy'])

history = model.fit(x_train,
              y_train,
              epochs = 50,
              batch_size = 128,
              validation_data = (x_valid, y_valid),
              verbose = 2)

history_dict = history.history
loss_values = history_dict['loss']
val_loss_values = history_dict['val_loss']
acc_values = history_dict['accuracy']
val_acc_values = history_dict['val_accuracy']
epochs = range(1, len(history_dict['accuracy']) + 1)

plt.plot(epochs, baseline_val_loss_values, 'bo', label = 'Baseline validation loss')
plt.plot(epochs, val_loss_values, 'b', label = 'Validation loss')
plt.title('Baseline and Current Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

plt.plot(epochs, baseline_val_acc_values, 'bo', label = 'Baseline validation accuracy')
plt.plot(epochs, val_acc_values, 'b', label = 'Validation accuracy')
plt.title('Baseline and Current Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

results = model.evaluate(x_test, y_test)
print(model.metrics_names)
print(results)

In [None]:
# Increase batchsize to 1024
backend.clear_session()
model = models.Sequential()
model.add(layers.Dense(64, activation = 'relu', input_shape = (10000,)))
model.add(layers.Dense(64, activation = 'relu'))
model.add(layers.Dense(46, activation = 'softmax'))

model.compile(optimizer=optimizers.RMSprop(0.0005),
             loss = 'categorical_crossentropy',
             metrics = ['accuracy'])

history = model.fit(x_train,
              y_train,
              epochs = 50,
              batch_size = 1024,
              validation_data = (x_valid, y_valid))

history_dict = history.history
loss_values = history_dict['loss']
val_loss_values = history_dict['val_loss']
acc_values = history_dict['accuracy']
val_acc_values = history_dict['val_accuracy']
epochs = range(1, len(history_dict['accuracy']) + 1)

plt.plot(epochs, baseline_val_loss_values, 'bo', label = 'Baseline validation loss')
plt.plot(epochs, val_loss_values, 'b', label = 'Validation loss')
plt.title('Baseline and Current Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

plt.plot(epochs, baseline_val_acc_values, 'bo', label = 'Baseline validation accuracy')
plt.plot(epochs, val_acc_values, 'b', label = 'Validation accuracy')
plt.title('Baseline and Current Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

results = model.evaluate(x_test, y_test)
print(model.metrics_names)
print(results)

## It looks like the large batch size improves the accuracy over the baseline. 

## Keeping the 0.0005 learning rate and 1024 batch size, let's add drop out.

In [None]:
# Add dropout
backend.clear_session()
model = models.Sequential()
model.add(layers.Dense(64, activation = 'relu', input_shape = (10000,)))
model.add(layers.Dropout(0.2))
model.add(layers.Dense(64, activation = 'relu'))
model.add(layers.Dropout(0.2))
model.add(layers.Dense(46, activation = 'softmax'))

model.compile(optimizer=optimizers.RMSprop(0.0005),
             loss = 'categorical_crossentropy',
             metrics = ['accuracy'])

history = model.fit(x_train,
              y_train,
              epochs = 50,
              batch_size = 1024,
              validation_data = (x_valid, y_valid),
              verbose = 2)

history_dict = history.history
loss_values = history_dict['loss']
val_loss_values = history_dict['val_loss']
acc_values = history_dict['accuracy']
val_acc_values = history_dict['val_accuracy']
epochs = range(1, len(history_dict['accuracy']) + 1)

plt.plot(epochs, baseline_val_loss_values, 'bo', label = 'Baseline validation loss')
plt.plot(epochs, val_loss_values, 'b', label = 'Validation loss')
plt.title('Baseline and Current Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

plt.plot(epochs, baseline_val_acc_values, 'bo', label = 'Baseline Training accuracy')
plt.plot(epochs, val_acc_values, 'b', label = 'Validation accuracy')
plt.title('Baseline and Current Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

results = model.evaluate(x_test, y_test)
print(model.metrics_names)
print(results)

## We keep improving.



In [None]:
# Add early stopping
backend.clear_session()
model = models.Sequential()
model.add(layers.Dense(64, activation = 'relu', input_shape = (10000,)))
model.add(layers.Dropout(0.2))
model.add(layers.Dense(64, activation = 'relu'))
model.add(layers.Dropout(0.2))
model.add(layers.Dense(46, activation = 'softmax'))

model.compile(optimizer=optimizers.RMSprop(0.0005),
             loss = 'categorical_crossentropy',
             metrics = ['accuracy'])

history = model.fit(x_train,
              y_train,
              epochs = 50,
              batch_size = 1024,
              validation_data = (x_valid, y_valid),
              verbose = 2,
              callbacks=[EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights = True)])

history_dict = history.history
loss_values = history_dict['loss']
val_loss_values = history_dict['val_loss']
acc_values = history_dict['accuracy']
val_acc_values = history_dict['val_accuracy']
epochs = range(1, len(history_dict['accuracy']) + 1)

plt.plot(epochs, val_loss_values, 'bo', label = 'Validation loss')
plt.title('Validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

plt.plot(epochs, val_acc_values, 'bo', label = 'Validation Accuracy')
plt.title('Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

results = model.evaluate(x_test, y_test)
print(model.metrics_names)
print(results)

In [None]:
# Make a better, deeper model.
backend.clear_session()
model = models.Sequential()
model.add(layers.Dense(256, activation = 'relu', input_shape = (10000,)))
model.add(layers.Dropout(0.2))
model.add(layers.Dense(128, activation = 'relu'))
model.add(layers.Dense(46, activation = 'softmax'))

model.compile(optimizer=optimizers.RMSprop(0.0005),
             loss = 'categorical_crossentropy',
             metrics = ['accuracy'])

history = model.fit(x_train,
              y_train,
              epochs = 100,
              batch_size = 1024,
              validation_data = (x_valid, y_valid),
              verbose = 2,
              callbacks=[EarlyStopping(monitor='val_accuracy', patience=3, restore_best_weights = True)])

history_dict = history.history
loss_values = history_dict['loss']
val_loss_values = history_dict['val_loss']
acc_values = history_dict['accuracy']
val_acc_values = history_dict['val_accuracy']
epochs = range(1, len(history_dict['accuracy']) + 1)

plt.plot(epochs, val_loss_values, 'bo', label = 'Validation loss')
plt.title('Validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

plt.plot(epochs, val_acc_values, 'bo', label = 'Validation Accuracy')
plt.title('Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

results = model.evaluate(x_test, y_test)
print(model.metrics_names)
print(results)