In [21]:
from typing import Tuple, Dict
import os
from typing import List
import tensorflow
from tensorflow import keras
import h5py
import numpy as np

In [None]:
!pip install tensorflow==2.9.1

# Supporting Custom Functions

In [23]:
def load_hdf5(path):
    with h5py.File(path, 'r') as f:
        train = f["train"]
        train_out = np.array(train["output"])
        train_in = np.array(train["input"])
        test = f["test"]
        test_out = np.array(test["output"])
        test_in = np.array(test["input"])
    return train_in, train_out, test_in, test_out

def loss_name(model):
    if isinstance(model.loss, str):
        loss = getattr(tensorflow.keras.losses, model.loss)
    else:
        loss = model.loss
    return loss.__name__.lower()

def hidden_activations(model):
    return [layer.activation
            for layer in model.layers[:-1] if hasattr(layer, "activation")]

def output_activation(model):
    return model.layers[-1].activation

def root_mean_squared_error(system: np.ndarray, human: np.ndarray):
    return ((system - human) ** 2).mean(axis=0) ** 0.5

def multi_class_accuracy(system: np.ndarray, human: np.ndarray):
    return np.mean(np.argmax(system, axis=1) == np.argmax(human, axis=1))

def binary_accuracy(system: np.ndarray, human: np.ndarray):
    return np.mean(np.round(system) == human)

In [24]:
from google.colab import drive
drive.mount('/content/gdrive')
%cd /content/gdrive/MyDrive/Colab Notebooks

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
/content/gdrive/MyDrive/Colab Notebooks


# Deep vs Wide Neural Networks

In [25]:
def create_auto_mpg_deep_and_wide_networks(
        n_inputs: int, n_outputs: int) -> Tuple[tensorflow.keras.models.Model,
                                                tensorflow.keras.models.Model]:
    """Creates one deep neural network and one wide neural network.
    The networks should have the same (or very close to the same) number of
    parameters and the same activation functions.

    The neural networks will be asked to predict the number of miles per gallon
    that different cars get. They will be trained and tested on the Auto MPG
    dataset from:
    https://archive.ics.uci.edu/ml/datasets/auto+mpg

    :param n_inputs: The number of inputs to the models.
    :param n_outputs: The number of outputs from the models.
    :return: A tuple of (deep neural network, wide neural network)
    """
    unit_controller = 5

    deep_neural_network = keras.Sequential([

          keras.layers.Dense(unit_controller*2**1, input_shape=(n_inputs,), activation='relu'),
          keras.layers.Dense(unit_controller*2**2, activation='relu'),
          keras.layers.Dense(unit_controller*2**3, activation='relu'),
          keras.layers.Dense(unit_controller*2**4, activation='relu'),
          keras.layers.Dense(unit_controller*2**4, activation='relu'),
          keras.layers.Dense(unit_controller*2**2, activation='relu'),
          keras.layers.Dense(unit_controller*2**1, activation='relu'),

          keras.layers.Dense(n_outputs, activation='linear')
    ])

    deep_neural_network.compile(optimizer='adam',
                    loss='MeanSquaredError',
                    metrics=['accuracy'])

    wide_neural_network = keras.Sequential([

          keras.layers.Dense(unit_controller*3**2, input_shape=(n_inputs,), activation='relu'),
          keras.layers.Dense(unit_controller*3**3, activation='relu'),
          keras.layers.Dense(unit_controller*3**2, activation='relu'),

          keras.layers.Dense(n_outputs, activation='linear')
    ])

    wide_neural_network.compile(optimizer='adam',
                    loss='MeanSquaredError',
                    metrics=['accuracy'])

    return (deep_neural_network,wide_neural_network)

In [26]:
train_in, train_out, test_in, test_out = load_hdf5("data/auto-mpg.hdf5")

deep, wide = create_auto_mpg_deep_and_wide_networks(train_in.shape[-1], train_out.shape[-1])
deep.summary(), wide.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_12 (Dense)            (None, 10)                80        
                                                                 
 dense_13 (Dense)            (None, 20)                220       
                                                                 
 dense_14 (Dense)            (None, 40)                840       
                                                                 
 dense_15 (Dense)            (None, 80)                3280      
                                                                 
 dense_16 (Dense)            (None, 80)                6480      
                                                                 
 dense_17 (Dense)            (None, 20)                1620      
                                                                 
 dense_18 (Dense)            (None, 10)               

(None, None)

In [27]:
len(deep.layers) > len(wide.layers)

True

In [28]:
params1 = deep.count_params()
params2 = wide.count_params()
abs(params1 - params2) / (params1 + params2) < 0.05

True

In [29]:
# train both networks
deep.fit(train_in, train_out, verbose=0, epochs=100)
wide.fit(train_in, train_out, verbose=0, epochs=100)

# check that error level is acceptable
mean_predict = np.full(shape=test_out.shape, fill_value=np.mean(train_out))
[baseline_rmse] = root_mean_squared_error(mean_predict, test_out)
[deep_rmse] = root_mean_squared_error(deep.predict(test_in), test_out)
[wide_rmse] = root_mean_squared_error(wide.predict(test_in), test_out)

rmse_format = "{1:.1f} RMSE for {0} on Auto MPG".format
print()
print(rmse_format("baseline", baseline_rmse))
print(rmse_format("deep", deep_rmse))
print(rmse_format("wide", wide_rmse))


8.2 RMSE for baseline on Auto MPG
4.9 RMSE for deep on Auto MPG
4.7 RMSE for wide on Auto MPG


# Neural Networks with different activations ("relu" vs "tanh")

In [30]:
def create_delicious_relu_vs_tanh_networks(
        n_inputs: int, n_outputs: int) -> Tuple[tensorflow.keras.models.Model,
                                                tensorflow.keras.models.Model]:
    """Creates one neural network where all hidden layers have ReLU activations,
    and one where all hidden layers have tanh activations. The networks should
    be identical other than the difference in activation functions.

    The neural networks will be asked to predict the 0 or more tags associated
    with a del.icio.us bookmark. They will be trained and tested on the
    del.icio.us dataset from:
    https://github.com/dhruvramani/Multilabel-Classification-Datasets
    which is a slightly simplified version of:
    https://archive.ics.uci.edu/ml/datasets/DeliciousMIL%3A+A+Data+Set+for+Multi-Label+Multi-Instance+Learning+with+Instance+Labels

    :param n_inputs: The number of inputs to the models.
    :param n_outputs: The number of outputs from the models.
    :return: A tuple of (ReLU neural network, tanh neural network)
    """
    unit_controller = 50

    relu_neural_network = keras.Sequential([

          keras.layers.Dense(unit_controller*2**1, input_shape=(n_inputs,), activation='relu'),
          keras.layers.Dense(unit_controller*2**2, activation='relu'),
          keras.layers.Dense(unit_controller*2**4, activation='relu'),
          keras.layers.Dense(unit_controller*2**2, activation='relu'),
          keras.layers.Dense(unit_controller*2**1, activation='relu'),

          keras.layers.Dense(n_outputs, activation='sigmoid')
    ])

    relu_neural_network.compile(optimizer='Adam',
                    loss='BinaryCrossentropy',
                    metrics=['Accuracy'])

    tanh_neural_network = keras.Sequential([

          keras.layers.Dense(unit_controller*2**1, input_shape=(n_inputs,), activation='tanh'),
          keras.layers.Dense(unit_controller*2**2, activation='tanh'),
          keras.layers.Dense(unit_controller*2**4, activation='tanh'),
          keras.layers.Dense(unit_controller*2**2, activation='tanh'),
          keras.layers.Dense(unit_controller*2**1, activation='tanh'),

          keras.layers.Dense(n_outputs, activation='sigmoid')
    ])

    tanh_neural_network.compile(optimizer='Adam',
                    loss='BinaryCrossentropy',
                    metrics=['Accuracy'])

    return (relu_neural_network,tanh_neural_network)

In [31]:
train_in, train_out, test_in, test_out = load_hdf5("data/delicious.hdf5")

# keep only every 10th training example
train_out = train_out[::10, :]
train_in = train_in[::10, :]
# keep only tags that occur at least 400 times
(tags,) = np.nonzero(np.sum(train_out, axis=0) >= 400)
train_out = train_out[:, tags]
test_out = test_out[:, tags]

relu, tanh = create_delicious_relu_vs_tanh_networks(train_in.shape[-1], train_out.shape[-1])
relu.summary(), tanh.summary()
relu_accuracy = binary_accuracy(relu.predict(test_in), test_out)
tanh_accuracy = binary_accuracy(tanh.predict(test_in), test_out)
all0_accuracy = np.sum(test_out == 0) / test_out.size

accuracy_format = "{1:.1%} accuracy for {0} on del.icio.us".format
print()
print(accuracy_format("baseline", all0_accuracy))
print(accuracy_format("relu", relu_accuracy))
print(accuracy_format("tanh", tanh_accuracy))

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_24 (Dense)            (None, 100)               50100     
                                                                 
 dense_25 (Dense)            (None, 200)               20200     
                                                                 
 dense_26 (Dense)            (None, 800)               160800    
                                                                 
 dense_27 (Dense)            (None, 200)               160200    
                                                                 
 dense_28 (Dense)            (None, 100)               20100     
                                                                 
 dense_29 (Dense)            (None, 5)                 505       
                                                                 
Total params: 411,905
Trainable params: 411,905
Non-tr




65.0% accuracy for baseline on del.icio.us
45.3% accuracy for relu on del.icio.us
48.4% accuracy for tanh on del.icio.us


# Dropout vs No-Dropout

In [32]:
def create_activity_dropout_and_nodropout_networks(
        n_inputs: int, n_outputs: int) -> Tuple[tensorflow.keras.models.Model,
                                                tensorflow.keras.models.Model]:
    """Creates one neural network with dropout applied after each layer, and
    one neural network without dropout. The networks should be identical other
    than the presence or absence of dropout.

    The neural networks will be asked to predict which one of six activity types
    a smartphone user was performing. They will be trained and tested on the
    UCI-HAR dataset from:
    https://archive.ics.uci.edu/ml/datasets/human+activity+recognition+using+smartphones

    :param n_inputs: The number of inputs to the models.
    :param n_outputs: The number of outputs from the models.
    :return: A tuple of (dropout neural network, no-dropout neural network)
    """
    unit_controller = 50
    dropout_fraction = 0.2

    drop_neural_network = keras.Sequential([

          keras.layers.Dense(unit_controller*2**1, input_shape=(n_inputs,), activation='relu'),
          keras.layers.Dropout(dropout_fraction),
          keras.layers.Dense(unit_controller*2**2, activation='relu'),
          keras.layers.Dropout(dropout_fraction),
          keras.layers.Dense(unit_controller*2**4, activation='relu'),
          keras.layers.Dropout(dropout_fraction),
          keras.layers.Dense(unit_controller*2**2, activation='relu'),
          keras.layers.Dropout(dropout_fraction),
          keras.layers.Dense(unit_controller*2**1, activation='relu'),
          keras.layers.Dropout(dropout_fraction),

          keras.layers.Dense(n_outputs, activation='softmax')
    ])

    drop_neural_network.compile(optimizer='Adam',
                    loss='CategoricalCrossentropy',
                    metrics=['Accuracy'])

    no_drop_neural_network = keras.Sequential([

          keras.layers.Dense(unit_controller*2**1, input_shape=(n_inputs,), activation='relu'),
          keras.layers.Dense(unit_controller*2**2, activation='relu'),
          keras.layers.Dense(unit_controller*2**4, activation='relu'),
          keras.layers.Dense(unit_controller*2**2, activation='relu'),
          keras.layers.Dense(unit_controller*2**1, activation='relu'),

          keras.layers.Dense(n_outputs, activation='softmax')
    ])

    no_drop_neural_network.compile(optimizer='Adam',
                    loss='CategoricalCrossentropy',
                    metrics=['Accuracy'])

    return (drop_neural_network,no_drop_neural_network)

In [33]:
train_in, train_out, test_in, test_out = load_hdf5("data/uci-har.hdf5")

# keep only every 10th training example
train_out = train_out[::10, :]
train_in = train_in[::10, :]

drop, no_drop = create_activity_dropout_and_nodropout_networks(train_in.shape[-1], train_out.shape[-1])

In [34]:
# train both networks
drop.fit(train_in, train_out, verbose=0, epochs=10)
no_drop.fit(train_in, train_out, verbose=0, epochs=10)

# check that accuracy level is acceptable
baseline_prediction = np.zeros_like(test_out)
baseline_prediction[:, np.argmax(np.sum(train_out, axis=0), axis=0)] = 1
baseline_accuracy = multi_class_accuracy(baseline_prediction, test_out)
dropout_accuracy = multi_class_accuracy(drop.predict(test_in), test_out)
no_dropout_accuracy = multi_class_accuracy(
no_drop.predict(test_in), test_out)

accuracy_format = "{1:.1%} accuracy for {0} on UCI-HAR".format
print()
print(accuracy_format("baseline", baseline_accuracy))
print(accuracy_format("dropout", dropout_accuracy))
print(accuracy_format("no dropout", no_dropout_accuracy))


18.2% accuracy for baseline on UCI-HAR
89.0% accuracy for dropout on UCI-HAR
88.3% accuracy for no dropout on UCI-HAR


# Early stopping vs Late stopping

In [35]:
def create_income_earlystopping_and_noearlystopping_networks(
        n_inputs: int, n_outputs: int) -> Tuple[tensorflow.keras.models.Model,
                                                Dict,
                                                tensorflow.keras.models.Model,
                                                Dict]:
    """Creates one neural network that uses early stopping during training, and
    one that does not. The networks should be identical other than the presence
    or absence of early stopping.

    The neural networks will be asked to predict whether a person makes more
    than $50K per year. They will be trained and tested on the "adult" dataset
    from:
    https://archive.ics.uci.edu/ml/datasets/adult

    :param n_inputs: The number of inputs to the models.
    :param n_outputs: The number of outputs from the models.
    :return: A tuple of (
        early-stopping neural network,
        early-stopping parameters that should be passed to Model.fit,
        no-early-stopping neural network,
        no-early-stopping parameters that should be passed to Model.fit
    )
    """
    unit_controller = 50

    early_neural_network = keras.Sequential([

          keras.layers.Dense(unit_controller*2**1, input_shape=(n_inputs,), activation='relu'),
          keras.layers.Dense(unit_controller*2**2, activation='relu'),
          keras.layers.Dense(unit_controller*2**4, activation='relu'),
          keras.layers.Dense(unit_controller*2**2, activation='relu'),
          keras.layers.Dense(unit_controller*2**1, activation='relu'),

          keras.layers.Dense(n_outputs, activation='sigmoid')
    ])

    early_neural_network.compile(optimizer='Adam',
                    loss='BinaryCrossentropy',
                    metrics=['Accuracy'])
    early_callbacks = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
    early_fit_kwargs = {'callbacks': early_callbacks}

    late_neural_network = keras.Sequential([

          keras.layers.Dense(unit_controller*2**1, input_shape=(n_inputs,), activation='relu'),
          keras.layers.Dense(unit_controller*2**2, activation='relu'),
          keras.layers.Dense(unit_controller*2**4, activation='relu'),
          keras.layers.Dense(unit_controller*2**2, activation='relu'),
          keras.layers.Dense(unit_controller*2**1, activation='relu'),

          keras.layers.Dense(n_outputs, activation='sigmoid')
    ])

    late_neural_network.compile(optimizer='Adam',
                    loss='BinaryCrossentropy',
                    metrics=['Accuracy'])
    late_callbacks = keras.callbacks.EarlyStopping(monitor='loss', patience=50)
    late_fit_kwargs = {'callbacks': late_callbacks}

    return (early_neural_network,early_fit_kwargs,late_neural_network,late_fit_kwargs)

In [36]:
early, early_fit_kwargs, late, late_fit_kwargs = \
create_income_earlystopping_and_noearlystopping_networks(train_in.shape[-1], train_out.shape[-1])

In [37]:
# train both networks
late_fit_kwargs.update(verbose=0, epochs=50)
late_hist = late.fit(train_in, train_out, **late_fit_kwargs)
early_fit_kwargs.update(verbose=0, epochs=50,
                            validation_data=(test_in, test_out))
early_hist = early.fit(train_in, train_out, **early_fit_kwargs)

# check that accuracy levels are acceptable
all1_accuracy = np.sum(test_out == 1) / test_out.size
early_accuracy = binary_accuracy(early.predict(test_in), test_out)
late_accuracy = binary_accuracy(late.predict(test_in), test_out)

accuracy_format = "{1:.1%} accuracy for {0} on census income".format
print()
print(accuracy_format("baseline", all1_accuracy))
print(accuracy_format("early", early_accuracy))
print(accuracy_format("late", late_accuracy))


14.3% accuracy for baseline on census income
96.1% accuracy for early on census income
97.7% accuracy for late on census income
