# Assignment 1: Neural Networks
Ryan McGuinness  
27 February, 2021

### The IMDB dataset

**Loading the IMDB dataset**

In [None]:
from tensorflow.keras.datasets import imdb

(train_data, train_labels), (test_data, test_labels) = imdb.load_data(
    num_words=10000)

**Decoding reviews back to text**

In [None]:
word_index = imdb.get_word_index()
reverse_word_index = dict(
    [(value, key) for (key, value) in word_index.items()])
decoded_review = " ".join(
    [reverse_word_index.get(i - 3, "?") for i in train_data[0]])

### Preparing the data

**Encoding the integer sequences via multi-hot encoding**

In [3]:
import numpy as np
def vectorize_sequences(sequences, dimension=10000):
    results = np.zeros((len(sequences), dimension))
    for i, sequence in enumerate(sequences):
        for j in sequence:
            results[i, j] = 1.
    return results
x_train = vectorize_sequences(train_data)
x_test = vectorize_sequences(test_data)

In [4]:
y_train = np.asarray(train_labels).astype("float32")
y_test = np.asarray(test_labels).astype("float32")

In [None]:
y_test

### Building the model

**Setting hyper-parameters**

In [6]:
from tensorflow import keras
from tensorflow.keras import layers

# Set number of hidden layers and number of hidden units
N_LAYERS = 2
N_UNITS = 32

# Select activation mode
ACT_MODE = "relu"
ACT_MODE = "tanh"

# Select loss metric
LOSS_METRIC = "binary_crossentropy"
LOSS_METRIC = "mean_squared_error"

# Set dropout rate
DROPOUT_RATE = 0.0

**Model Definition**

In [7]:
model = keras.Sequential([layers.Dense(N_UNITS, activation=ACT_MODE),
                          layers.Dropout(DROPOUT_RATE)] * N_LAYERS +  
                         [layers.Dense(1, activation="sigmoid")])

**Compiling the model**

In [8]:
model.compile(optimizer="rmsprop",
              loss=LOSS_METRIC,
              metrics=["accuracy"])

### Validating the approach

**Setting aside a validation set**

In [9]:
x_val = x_train[:10000]
partial_x_train = x_train[10000:]
y_val = y_train[:10000]
partial_y_train = y_train[10000:]

**Training the model**

In [None]:
history = model.fit(partial_x_train,
                    partial_y_train,
                    epochs=15,
                    batch_size=512,
                    validation_data=(x_val, y_val))

**Plotting the training and validation loss**

In [None]:
import matplotlib.pyplot as plt
history_dict = history.history

loss_values = history_dict["loss"]
val_loss_values = history_dict["val_loss"]
epochs = range(1, len(loss_values) + 1)
plt.plot(epochs, loss_values, "bo", label="Training loss")
plt.plot(epochs, val_loss_values, "b", label="Validation loss")
plt.title("Training and validation loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.show()

**Plotting the training and validation accuracy**

In [None]:
plt.clf()
acc = history_dict["accuracy"]
val_acc = history_dict["val_accuracy"]
plt.plot(epochs, acc, "bo", label="Training acc")
plt.plot(epochs, val_acc, "b", label="Validation acc")
plt.title("Training and validation accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()
plt.show()

###Retraining a model from scratch

**Resetting model**

In [13]:
model = keras.Sequential([layers.Dense(N_UNITS, activation=ACT_MODE),
                          layers.Dropout(DROPOUT_RATE)] * N_LAYERS +  
                         [layers.Dense(1, activation="sigmoid")])

model.compile(optimizer="rmsprop",
              loss=LOSS_METRIC,
              metrics=["accuracy"])

**Training for optimal number of epochs**

In [None]:
model.fit(x_train, y_train, epochs=5, batch_size=512)
results = model.evaluate(x_test, y_test)

### Using a trained model to generate predictions on new data

In [None]:
predicted = model.predict(x_test)
predicted