In [15]:
"""
The main code for the feedforward networks assignment.
See README.md for details.
"""
from typing import Tuple, Dict
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
from tensorflow.keras import optimizers
from tensorflow.keras import callbacks
from tensorflow.keras.regularizers import l2  # Import L2 regularization

def create_auto_mpg_deep_and_wide_networks(n_inputs: int, n_outputs: int):
    """Creates a deep and a wide neural network with similar number of parameters."""

    deep = models.Sequential([
        layers.Input(shape=(n_inputs,)),
        layers.Dense(64, activation='relu'),
        layers.Dense(48, activation='relu'),
        layers.Dense(32, activation='relu'),
        layers.Dense(n_outputs)
    ])

    # Further reduced wide model layer sizes
    wide = models.Sequential([
        layers.Input(shape=(n_inputs,)),
        layers.Dense(128, activation='relu', kernel_regularizer=l2(0.001)),  # Reduced size more
        layers.Dense(32, activation='relu', kernel_regularizer=l2(0.001)),  # Reduced size more
        layers.Dense(n_outputs)
    ])

    # Compile both models
    deep.compile(optimizer="adam", loss="mse")
    wide.compile(optimizer="adam", loss="mse")

    # Debugging: Print parameter counts
    deep_params = deep.count_params()
    wide_params = wide.count_params()
    print(f"Deep network params: {deep_params}, Wide network params: {wide_params}")

    return deep, wide

def create_activity_dropout_and_nodropout_networks(n_inputs: int, n_outputs: int):
    """Creates two identical networks: one with dropout and one without, keeping parameter count identical."""

    def build_network(use_dropout):
        model = models.Sequential([
            layers.Input(shape=(n_inputs,)),
            layers.Dense(64, activation='relu', kernel_regularizer=l2(0.001)),  # Add L2
            layers.Dense(32, activation='relu', kernel_regularizer=l2(0.001)),
        ])

        # Add Dropout layer if 'use_dropout' is True
        if use_dropout:
            model.add(layers.Dropout(0.5))  # Add a Dropout layer with a dropout rate of 0.5

        model.add(layers.Dense(n_outputs, activation='softmax'))  # Use 'softmax' for multi-class output
        model.compile(optimizer=optimizers.Adam(learning_rate=0.0005), loss="categorical_crossentropy", metrics=["accuracy"])
        return model

    dropout_model = build_network(use_dropout=True)
    no_dropout_model = build_network(use_dropout=False)

    # Debugging: Print parameter counts
    print(f"Dropout model params: {dropout_model.count_params()}, No-dropout model params: {no_dropout_model.count_params()}")

    return dropout_model, no_dropout_model


def create_income_earlystopping_and_noearlystopping_networks(n_inputs: int, n_outputs: int):
    """Creates two networks: one with early stopping and one without."""

    def build_model():
        model = models.Sequential([
            layers.Input(shape=(n_inputs,)),
            layers.Dense(128, activation='relu', kernel_regularizer=l2(0.001)),  # Keep the layer size larger for better capacity
            layers.Dense(64, activation='relu', kernel_regularizer=l2(0.001)),
            layers.Dense(n_outputs, activation='sigmoid')  # Sigmoid activation for binary classification
        ])
        model.compile(optimizer=optimizers.Adam(learning_rate=0.0005), loss="binary_crossentropy", metrics=["accuracy"])
        return model

    early_model = build_model()
    late_model = build_model()

    # Early stopping callback
    early_stop = callbacks.EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)

    # Fit parameters
    early_fit_kwargs = {"batch_size": 32, "epochs": 50, "callbacks": [early_stop]}
    late_fit_kwargs = {"batch_size": 32, "epochs": 100}  # Increase epochs for late model

    return early_model, early_fit_kwargs, late_model, late_fit_kwargs