In [73]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [74]:
# %load nn.py
"""nn

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1Vhinm4pCPDNgnCQeFl9feNblwzgjH2IK
"""

"""
The main code for the feedforward networks assignment.
See README.md for details.
"""
from typing import Tuple, Dict
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks

def create_auto_mpg_deep_and_wide_networks(
        n_inputs: int, n_outputs: int) -> Tuple[tf.keras.models.Model, tf.keras.models.Model]:
    """Creates one deep neural network and one wide neural network.
    The networks should have the same (or very close to the same) number of
    parameters and the same activation functions.

    The neural networks will be asked to predict the number of miles per gallon
    that different cars get. They will be trained and tested on the Auto MPG
    dataset from:
    https://archive.ics.uci.edu/ml/datasets/auto+mpg

    :param n_inputs: The number of inputs to the models.
    :param n_outputs: The number of outputs from the models.
    :return: A tuple of (deep neural network, wide neural network)
    """
    # Deep Neural Network
    deep_model = models.Sequential([
        layers.Dense(64, activation='relu', input_shape=(n_inputs,)),
        layers.Dense(32, activation='relu'),
        layers.Dense(16, activation='relu'),
        layers.Dense(n_outputs)
    ])

    # Wide Neural Network
    wide_model = models.Sequential([
        layers.Dense(128, activation='relu', input_shape=(n_inputs,)),
        layers.Dense(n_outputs)
    ])

    return deep_model, wide_model

def create_activity_dropout_and_nodropout_networks(
        n_inputs: int, n_outputs: int) -> Tuple[tf.keras.models.Model, tf.keras.models.Model]:
    """Creates one neural network with dropout applied after each layer, and
    one neural network without dropout. The networks should be identical other
    than the presence or absence of dropout.

    The neural networks will be asked to predict which one of six activity types
    a smartphone user was performing. They will be trained and tested on the
    UCI-HAR dataset from:
    https://archive.ics.uci.edu/ml/datasets/human+activity+recognition+using+smartphones

    :param n_inputs: The number of inputs to the models.
    :param n_outputs: The number of outputs from the models.
    :return: A tuple of (dropout neural network, no-dropout neural network)
    """
    # Dropout Neural Network
    dropout_model = models.Sequential([
        layers.Dense(64, activation='relu', input_shape=(n_inputs,)),
        layers.Dropout(0.5),
        layers.Dense(32, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(n_outputs, activation='softmax')
    ])

    # No-Dropout Neural Network
    no_dropout_model = models.Sequential([
        layers.Dense(64, activation='relu', input_shape=(n_inputs,)),
        layers.Dense(32, activation='relu'),
        layers.Dense(n_outputs, activation='softmax')
    ])

    return dropout_model, no_dropout_model

def create_income_earlystopping_and_noearlystopping_networks(
        n_inputs: int, n_outputs: int) -> Tuple[tf.keras.models.Model, Dict, tf.keras.models.Model, Dict]:
    """Creates one neural network that uses early stopping during training, and
    one that does not. The networks should be identical other than the presence
    or absence of early stopping.

    The neural networks will be asked to predict whether a person makes more
    than $50K per year. They will be trained and tested on the "adult" dataset
    from:
    https://archive.ics.uci.edu/ml/datasets/adult

    :param n_inputs: The number of inputs to the models.
    :param n_outputs: The number of outputs from the models.
    :return: A tuple of (
        early-stopping neural network,
        early-stopping parameters that should be passed to Model.fit,
        no-early-stopping neural network,
        no-early-stopping parameters that should be passed to Model.fit
    )
    """
    # Early Stopping Neural Network
    early_stopping_model = models.Sequential([
        layers.Dense(64, activation='relu', input_shape=(n_inputs,)),
        layers.Dense(32, activation='relu'),
        layers.Dense(n_outputs, activation='sigmoid')
    ])

    early_stopping_params = {
        'callbacks': [callbacks.EarlyStopping(monitor='val_loss', patience=3)],
        'validation_split': 0.2
    }

    # No Early Stopping Neural Network
    no_early_stopping_model = models.Sequential([
        layers.Dense(64, activation='relu', input_shape=(n_inputs,)),
        layers.Dense(32, activation='relu'),
        layers.Dense(n_outputs, activation='sigmoid')
    ])

    no_early_stopping_params = {
        'validation_split': 0.2
    }

    return early_stopping_model, early_stopping_params, no_early_stopping_model, no_early_stopping_params

In [75]:
with open('nn.py', 'r') as file:
    code = file.read()

In [76]:
print(code)

# -*- coding: utf-8 -*-
"""nn

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1Vhinm4pCPDNgnCQeFl9feNblwzgjH2IK
"""

"""
The main code for the feedforward networks assignment.
See README.md for details.
"""
from typing import Tuple, Dict
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
from tensorflow.keras import optimizers
from tensorflow.keras import callbacks
from tensorflow.keras.regularizers import l2  # Import L2 regularization

def create_auto_mpg_deep_and_wide_networks(n_inputs: int, n_outputs: int):
    """Creates a deep and a wide neural network with similar number of parameters."""

    deep = models.Sequential([
        layers.Input(shape=(n_inputs,)),
        layers.Dense(64, activation='relu'),
        layers.Dense(48, activation='relu'),
        layers.Dense(32, activation='relu'),
        layers.Dense(n_outputs)
    ])

    # Further reduced wide model layer size

In [77]:
# %load test_nn.py
"""test_nn

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1nVEyMWWFM0iToa5w6bjJc7B0ujKfr3ho
"""

import os
import pandas as pd
import numpy as np
import h5py
import requests
from io import StringIO

url = "http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data"

# Create the 'data/' directory if it doesn't exist
data_dir = 'data'
os.makedirs(data_dir, exist_ok=True)

# Download data using requests
response = requests.get(url)
if response.status_code == 200:
    print("Data downloaded successfully.")

    data_content = response.text
    df = pd.read_csv(StringIO(data_content), header=None, sep="\s+", na_values="?", names=[
        "mpg", "cylinders", "displacement", "horsepower", "weight",
        "acceleration", "model year", "origin", "carname"])

    df = df.dropna().drop("carname", axis=1)
    input_df = df.drop("mpg", axis=1)
    output_df = df[["mpg"]]

    mask = np.random.rand(len(df)) < 0.8
    train_input = input_df[mask].values
    train_output = output_df[mask].values
    test_input = input_df[~mask].values
    test_output = output_df[~mask].values

    with h5py.File(os.path.join(data_dir, 'auto-mpg.hdf5'), 'w') as f:
        train = f.create_group("train")
        train.create_dataset("input", compression="gzip", data=train_input)
        train.create_dataset("output", compression="gzip", data=train_output)

        test = f.create_group("test")
        test.create_dataset("input", compression="gzip", data=test_input)
        test.create_dataset("output", compression="gzip", data=test_output)

    print("Data saved to 'auto-mpg.hdf5'.")
else:
    print(f"Failed to download data. HTTP Status Code: {response.status_code}")

import os
import io
import zipfile
import urllib.request
import numpy as np
import h5py
from keras.utils import to_categorical

# Specify the URL for the UCI HAR Dataset.zip file
url = "http://archive.ics.uci.edu/ml/machine-learning-databases/00240/UCI%20HAR%20Dataset.zip"


# Download and extract the dataset
with zipfile.ZipFile(io.BytesIO(urllib.request.urlopen(url).read()), 'r') as zip:
    train_input = np.loadtxt(zip.extract("UCI HAR Dataset/train/X_train.txt"))
    train_output = to_categorical(np.loadtxt(zip.extract("UCI HAR Dataset/train/y_train.txt")))
    test_input = np.loadtxt(zip.extract("UCI HAR Dataset/test/X_test.txt"))
    test_output = to_categorical(np.loadtxt(zip.extract("UCI HAR Dataset/test/y_test.txt")))

# Create an HDF5 file to store the data
hdf5_path = 'data/uci-har.hdf5'
with h5py.File(hdf5_path, 'w') as f:
    train = f.create_group("train")
    train.create_dataset("input", compression="gzip", data=train_input, dtype=np.dtype("f2"))
    train.create_dataset("output", compression="gzip", data=train_output, dtype=np.dtype("i1"))
    test = f.create_group("test")
    test.create_dataset("input", compression="gzip", data=test_input, dtype=np.dtype("f2"))
    test.create_dataset("output", compression="gzip", data=test_output, dtype=np.dtype("i1"))

# Print a message indicating that the data has been downloaded and saved
print("Data downloaded successfully.")
print("Data saved to:", hdf5_path)

import pandas as pd
import numpy as np
import h5py
from urllib.error import HTTPError
import os

# Create the 'data' directory if it doesn't exist
if not os.path.exists('data'):
    os.makedirs('data')

url = "http://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data"
column_names = [
    "age", "workclass", "fnlwgt", "education", "education_num", "marital_status",
    "occupation", "relationship", "race", "sex", "capital_gain", "capital_loss",
    "hours_per_week", "native_country", "income"
]

try:
    # Load the dataset
    df = pd.read_csv(url, header=None, sep=", ", na_values="?", engine="python", names=column_names)
    df = df.dropna()  # Drop rows with missing values
    df = pd.get_dummies(df)  # Convert categorical variables to one-hot encoding
    df = df.drop("income_<=50K", axis=1)  # Drop one of the target columns to avoid redundancy
    input_df = df.drop("income_>50K", axis=1)  # Input features
    output_df = df[["income_>50K"]]  # Target column

    # Split the data into training and testing sets
    mask = np.random.rand(len(df)) < 0.8
    train_input = input_df[mask].values.astype('float32')  # Convert to float32
    train_output = output_df[mask].values.astype('int32')  # Convert to int32
    test_input = input_df[~mask].values.astype('float32')  # Convert to float32
    test_output = output_df[~mask].values.astype('int32')  # Convert to int32

    # Save the data to an HDF5 file
    with h5py.File('data/income.hdf5', 'w') as f:
        train = f.create_group("train")
        train.create_dataset("input", compression="gzip", data=train_input, dtype='float32')
        train.create_dataset("output", compression="gzip", data=train_output, dtype='int32')

        test = f.create_group("test")
        test.create_dataset("input", compression="gzip", data=test_input, dtype='float32')
        test.create_dataset("output", compression="gzip", data=test_output, dtype='int32')

    print("Data downloaded successfully.")
    print("Data saved to 'data/income.hdf5'.")
except HTTPError as e:
    print(f"An error occurred: {e}")

import os
from typing import List

import h5py
import numpy as np
import pytest
import tensorflow

import nn


@pytest.fixture(autouse=True)
def set_seeds():
    os.environ["TF_DETERMINISTIC_OPS"] = "1"
    tensorflow.random.set_seed(42)
    tensorflow.config.threading.set_intra_op_parallelism_threads(1)
    tensorflow.config.threading.set_inter_op_parallelism_threads(1)


def test_deep_vs_wide(capsys):
    train_in, train_out, test_in, test_out = load_hdf5("data/auto-mpg.hdf5")

    deep, wide = nn.create_auto_mpg_deep_and_wide_networks(
        train_in.shape[-1], train_out.shape[-1])

    # check that the deep neural network is indeed deeper
    assert len(deep.layers) > len(wide.layers)

    # check that the 2 networks have (nearly) the same number of parameters
    params1 = deep.count_params()
    params2 = wide.count_params()
    assert abs(params1 - params2) / (params1 + params2) < 0.05

    # check that the 2 networks have the same compile parameters
    assert_compile_parameters_equal(deep, wide)

    # check that the 2 networks have the same activation functions
    assert set(hidden_activations(deep)) == set(hidden_activations(wide))

    # check that output type and loss are appropriate for regression
    assert all("mean" in loss_name(model) for model in [deep, wide])
    assert loss_name(deep) == loss_name(wide)
    assert output_activation(deep) == output_activation(wide) == \
        tensorflow.keras.activations.linear

    # train both networks
    deep.fit(train_in, train_out, verbose=0, epochs=100)
    wide.fit(train_in, train_out, verbose=0, epochs=100)

    # check that error level is acceptable
    mean_predict = np.full(shape=test_out.shape, fill_value=np.mean(train_out))
    [baseline_rmse] = root_mean_squared_error(mean_predict, test_out)
    [deep_rmse] = root_mean_squared_error(deep.predict(test_in), test_out)
    [wide_rmse] = root_mean_squared_error(wide.predict(test_in), test_out)
    with capsys.disabled():
        rmse_format = "{1:.1f} RMSE for {0} on Auto MPG".format
        print()
        print(rmse_format("baseline", baseline_rmse))
        print(rmse_format("deep", deep_rmse))
        print(rmse_format("wide", wide_rmse))

    assert deep_rmse < baseline_rmse
    assert wide_rmse < baseline_rmse




def test_dropout(capsys):

    train_in, train_out, test_in, test_out = load_hdf5("data/uci-har.hdf5")

    # keep only every 10th training example
    train_out = train_out[::10, :]
    train_in = train_in[::10, :]

    drop, no_drop = nn.create_activity_dropout_and_nodropout_networks(
        train_in.shape[-1], train_out.shape[-1])

    # check that the dropout network has Dropout and the other doesn't
    assert any(isinstance(layer, tensorflow.keras.layers.Dropout)
               for layer in drop.layers)
    assert all(not isinstance(layer, tensorflow.keras.layers.Dropout)
               for layer in no_drop.layers)

    # check that the 2 networks have the same number of parameters
    assert drop.count_params() == no_drop.count_params()

    # check that the two networks are identical other than dropout
    dropped_dropout = [l for l in drop.layers
                       if not isinstance(l, tensorflow.keras.layers.Dropout)]
    assert_layers_equal(dropped_dropout, no_drop.layers)

    # check that the 2 networks have the same compile parameters
    assert_compile_parameters_equal(drop, no_drop)

    # check that output type and loss are appropriate for multi-class
    assert all("categorical" in loss_name(model)
               for model in [drop, no_drop])
    assert loss_name(drop) == loss_name(no_drop)
    assert output_activation(drop) == output_activation(no_drop) == \
        tensorflow.keras.activations.softmax

    # train both networks
    drop.fit(train_in, train_out, verbose=0, epochs=10)
    no_drop.fit(train_in, train_out, verbose=0, epochs=10)

    # check that accuracy level is acceptable
    baseline_prediction = np.zeros_like(test_out)
    baseline_prediction[:, np.argmax(np.sum(train_out, axis=0), axis=0)] = 1
    baseline_accuracy = multi_class_accuracy(baseline_prediction, test_out)
    dropout_accuracy = multi_class_accuracy(drop.predict(test_in), test_out)
    no_dropout_accuracy = multi_class_accuracy(
        no_drop.predict(test_in), test_out)
    with capsys.disabled():
        accuracy_format = "{1:.1%} accuracy for {0} on UCI-HAR".format
        print()
        print(accuracy_format("baseline", baseline_accuracy))
        print(accuracy_format("dropout", dropout_accuracy))
        print(accuracy_format("no dropout", no_dropout_accuracy))
    assert dropout_accuracy >= 0.75
    assert no_dropout_accuracy >= 0.75


def test_early_stopping(capsys):

    train_in, train_out, test_in, test_out = load_hdf5("data/income.hdf5")

    # keep only every 10th training example
    train_out = train_out[::10, :]
    train_in = train_in[::10, :]

    early, early_fit_kwargs, late, late_fit_kwargs = \
        nn.create_income_earlystopping_and_noearlystopping_networks(
            train_in.shape[-1], train_out.shape[-1])

    # check that the two networks have the same number of parameters
    assert early.count_params() == late.count_params()

    # check that the two networks have identical layers
    assert_layers_equal(early.layers, late.layers)

    # check that the 2 networks have the same compile parameters
    assert_compile_parameters_equal(early, late)

    # check that output type and loss are appropriate for binary classification
    assert all(any(x in loss_name(model) for x in {"crossentropy", "hinge"})
               and "categorical" not in loss_name(model)
               for model in [early, late])
    assert loss_name(early) == loss_name(late)
    assert output_activation(early) == output_activation(late) == \
        tensorflow.keras.activations.sigmoid

    # train both networks
    late_fit_kwargs.update(verbose=0, epochs=50)
    late_hist = late.fit(train_in, train_out, **late_fit_kwargs)
    early_fit_kwargs.update(verbose=0, epochs=50,
                            validation_data=(test_in, test_out))
    early_hist = early.fit(train_in, train_out, **early_fit_kwargs)

    # check that accuracy levels are acceptable
    all1_accuracy = np.sum(test_out == 1) / test_out.size
    early_accuracy = binary_accuracy(early.predict(test_in), test_out)
    late_accuracy = binary_accuracy(late.predict(test_in), test_out)
    assert early_accuracy > 0.75
    assert late_accuracy > 0.75
    with capsys.disabled():
        accuracy_format = "{1:.1%} accuracy for {0} on census income".format
        print()
        print(accuracy_format("baseline", all1_accuracy))
        print(accuracy_format("early", early_accuracy))
        print(accuracy_format("late", late_accuracy))
    assert early_accuracy > all1_accuracy
    assert late_accuracy > all1_accuracy

    # check that the first network stopped early (fewer epochs)
    assert len(early_hist.history["loss"]) < len(late_hist.history["loss"])



def load_hdf5(path):
    with h5py.File(path, 'r') as f:
        train = f["train"]
        train_out = np.array(train["output"])
        train_in = np.array(train["input"])
        test = f["test"]
        test_out = np.array(test["output"])
        test_in = np.array(test["input"])
    return train_in, train_out, test_in, test_out


def assert_layers_equal(layers1: List[tensorflow.keras.layers.Layer],
                        layers2: List[tensorflow.keras.layers.Layer]):
    def layer_info(layer):
        return (layer.__class__,
                getattr(layer, "units", None),
                getattr(layer, "activation", None))

    assert [layer_info(l) for l in layers1] == [layer_info(l) for l in layers2]


def assert_compile_parameters_equal(model1: tensorflow.keras.models.Model,
                                    model2: tensorflow.keras.models.Model):
    def to_dict(obj):
        items = dict(__class__=obj.__class__.__name__, **vars(obj))
        to_remove = {key for key, value in items.items() if key.endswith("_fn")}
        for key in to_remove:
            items.pop(key)

    assert to_dict(model1.optimizer) == to_dict(model2.optimizer)


def loss_name(model):
    if isinstance(model.loss, str):
        loss = getattr(tensorflow.keras.losses, model.loss)
    else:
        loss = model.loss
    return loss.__name__.lower()


def hidden_activations(model):
    return [layer.activation
            for layer in model.layers[:-1] if hasattr(layer, "activation")]


def output_activation(model):
    return model.layers[-1].activation


def root_mean_squared_error(system: np.ndarray, human: np.ndarray):
    return ((system - human) ** 2).mean(axis=0) ** 0.5


def multi_class_accuracy(system: np.ndarray, human: np.ndarray):
    return np.mean(np.argmax(system, axis=1) == np.argmax(human, axis=1))


def binary_accuracy(system: np.ndarray, human: np.ndarray):
    return np.mean(np.round(system) == human)

Data downloaded successfully.
Data saved to 'auto-mpg.hdf5'.
Data downloaded successfully.
Data saved to: data/uci-har.hdf5
Data downloaded successfully.
Data saved to 'data/income.hdf5'.


In [78]:
!pytest

platform linux -- Python 3.11.11, pytest-8.3.4, pluggy-1.5.0
rootdir: /content
plugins: langsmith-0.3.8, typeguard-4.4.1, anyio-3.7.1
collected 3 items                                                                                  [0m

test_nn.py 
7.5 RMSE for baseline on Auto MPG
6.1 RMSE for deep on Auto MPG
6.4 RMSE for wide on Auto MPG
[32m.[0m
18.2% accuracy for baseline on UCI-HAR
86.3% accuracy for dropout on UCI-HAR
84.4% accuracy for no dropout on UCI-HAR
[32m.[0m
24.9% accuracy for baseline on census income
78.8% accuracy for early on census income
79.2% accuracy for late on census income
[32m.[0m[32m                                                                               [100%][0m

