In [21]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
import matplotlib.pyplot as plt

In [22]:
def polynomial(x):
    y1 = x[:, 0] * x[:, 1] * x[:, 2] + 1.2 * x[:, 0] * x[:, 4] - 0.1 * x[:, 5] * x[:, 6] * x[:, 7] - 2 * x[:,
                                                                                                         0] ** 2 * x[:,
                                                                                                                   7] + x[
                                                                                                                        :,
                                                                                                                        4]
    y2 = x[:, 0] * x[:, 4] * x[:, 5] - x[:, 2] * x[:, 3] - 3 * x[:, 1] * x[:, 2] + 2 * x[:, 1] ** 2 * x[:, 3] - 2 * x[:,
                                                                                                                    6] * x[
                                                                                                                         :,
                                                                                                                         7] - 1
    y3 = x[:, 2] ** 2 - x[:, 4] * x[:, 6] - 3 * x[:, 0] * x[:, 3] * x[:, 5] - x[:, 0] ** 2 * x[:, 2] * x[:, 4] - 1
    y4 = -x[:, 5] ** 3 + 2 * x[:, 0] * x[:, 2] * x[:, 7] - x[:, 0] * x[:, 3] * x[:, 6] - 2 * x[:, 4] ** 2 * x[:, 1] * x[
                                                                                                                      :,
                                                                                                                      3] - x[
                                                                                                                           :,
                                                                                                                           7]
    y5 = x[:, 0] ** 2 * x[:, 4] - 3 * x[:, 2] * x[:, 3] * x[:, 7] + x[:, 0] * x[:, 1] * x[:, 3] - 3 * x[:, 5] + x[:,
                                                                                                                0] ** 2 * x[
                                                                                                                          :,
                                                                                                                          6] + 2
    y6 = x[:, 0] ** 2 * x[:, 2] * x[:, 5] - x[:, 2] * x[:, 4] * x[:, 6] + x[:, 2] * x[:, 3] + 2.2 * x[:, 1] * x[:,
                                                                                                              2] ** 2 - 1.1
    return np.stack([y1, y2, y3, y4, y5, y6], axis=-1)

In [23]:
def mse_loss(y_true, y_pred):
    return tf.reduce_mean(tf.square(y_pred - y_true))

In [24]:
np.random.seed(0)
Nt = 1000
Nv = 100
noise_sigma = 0.001

print(tf.config.list_physical_devices('GPU'))

# Generate the input data
x_train = np.random.uniform(low=-1, high=1, size=(Nt, 8))
x_val = np.random.uniform(low=-1, high=1, size=(Nv, 8))

# Generate the output data with noise
y_train = polynomial(x_train) + np.random.normal(scale=noise_sigma)
y_val = polynomial(x_val)

[]


In [25]:
def build_model(hidden_layer_nodes, activation_fn):
    model = models.Sequential()
    model.add(layers.Dense(hidden_layer_nodes, activation=activation_fn))
    model.add(layers.Dense(hidden_layer_nodes, activation=activation_fn))
    model.add(layers.Dense(hidden_layer_nodes, activation=activation_fn))
    model.add(layers.Dense(6))
    return model


def train_model(model, x_train, y_train, x_val, y_val, learning_rate, epochs):
    optimizer = tf.keras.optimizers.SGD(learning_rate)
    model.compile(optimizer=optimizer, loss=mse_loss, metrics=['mae'])
    history = model.fit(x_train, y_train, batch_size=32, epochs=epochs, validation_data=(x_val, y_val), verbose=0)
    return history


# Train the model with various configurations
activations = ['relu', 'tanh', 'sigmoid']
learning_rates = [0.1, 0.01, 0.001]
epochs = [1000, 1500, 2250]

best_params = None
best_val_loss = 1.0  # set initial loss

for activation in activations:
    for learning_rate in learning_rates:
        for epoch in epochs:
            model = build_model(6, activation)
            history = train_model(model, x_train, y_train, x_val, y_val, learning_rate, epoch)
            print(
                f'\n - Activation:{activation}, Learning Rate:{learning_rate}, Epoch:{epoch} \n\tTraining and validation errors: {history.history["loss"][-1]:.4f}, {history.history["val_loss"][-1]:.4f}')

            val_loss = history.history["val_loss"][-1]
            if val_loss < best_val_loss:
                best_params = {'mdoel': model, 'activation': activation, 'learning_rate': learning_rate, 'epoch': epoch}
                best_val_loss = val_loss

print(f'\nBest parameters: {best_params}')


 - Activation:relu, Learning Rate:0.1, Epoch:1000 
	Training and validation errors: 0.4189, 0.4102

 - Activation:relu, Learning Rate:0.1, Epoch:1500 
	Training and validation errors: 0.4109, 0.4110

 - Activation:relu, Learning Rate:0.1, Epoch:2250 
	Training and validation errors: 0.4475, 0.4262

 - Activation:relu, Learning Rate:0.01, Epoch:1000 
	Training and validation errors: 0.4116, 0.4408

 - Activation:relu, Learning Rate:0.01, Epoch:1500 
	Training and validation errors: 0.4276, 0.4470

 - Activation:relu, Learning Rate:0.01, Epoch:2250 
	Training and validation errors: 0.4123, 0.4272

 - Activation:relu, Learning Rate:0.001, Epoch:1000 
	Training and validation errors: 0.6761, 0.5218

 - Activation:relu, Learning Rate:0.001, Epoch:1500 
	Training and validation errors: 0.6906, 0.5304

 - Activation:relu, Learning Rate:0.001, Epoch:2250 
	Training and validation errors: 0.6286, 0.4886

 - Activation:tanh, Learning Rate:0.1, Epoch:1000 
	Training and validation errors: 0.3740

### STEP 8

In [1]:
def add_nodes_to_hidden_layers(model, x_train, y_train, x_val, y_val, learning_rate, epochs, nodes_to_add, num_hidden_layers):
    for i in range(num_hidden_layers):
        for j in range(nodes_to_add):
            # model.add(layers.Dense(num_hidden_layers + 1, activation=activation)) # compare to below two rows
            model.layers[i+1].add(layers.Dense(6 + j + 1, activation=activation))
            model.layers[i+1].add(layers.Dense(6 + j + 1))
            
            history = train_model(model, x_train, y_train, x_val, y_val, learning_rate, epochs)
            print(f'Training and validation errors with {num_hidden_layers + 1} nodes in layer {i + 1}: {history.history["loss"][-1]:.4f}, {history.history["val_loss"][-1]:.4f}')
            return history

In [ ]:
def plot_curve(result):
    
    num_samples = len(result['loss'])
    train_loss = np.array(result['loss'])
    bias = np.mean(train_loss[:num_samples//2])
    variance = np.var(train_loss[num_samples//2:])
    
    # Plot the bias-variance curve
    plt.plot([0, num_samples], [bias, bias], 'k--', label='Bias')
    plt.plot([0, num_samples], [variance, variance], 'k:', label='Variance')
    plt.plot(result['loss'], '-o', label='Training Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Bias-Variance Curve')
    plt.legend()
    plt.show()

In [ ]:
result1 = add_nodes_to_hidden_layers(best_params.model, x_train, y_train, x_val, y_val, best_params.learning_rate, epochs, 2, 1).history
result2 = add_nodes_to_hidden_layers(best_params.model, x_train, y_train, x_val, y_val, best_params.learning_rate, epochs, 2, 2).history
result3 = add_nodes_to_hidden_layers(best_params.model, x_train, y_train, x_val, y_val, best_params.learning_rate, epochs, 2, 3).history

plot_curve(result1)
plot_curve(result2)
plot_curve(result3)


### Step 9

In [26]:
def add_nodes_with_training(model, x_train, y_train, x_val, y_val, learning_rate, epochs, nodes_to_add,
                            hidden_layer_nodes):
    for i in range(3):
        for j in range(nodes_to_add):
            model.add(layers.Dense(hidden_layer_nodes + 1, activation=activation))
            history = train_model(model, x_train, y_train, x_val, y_val, learning_rate, epochs)
            print(
                f'Training and validation errors with {hidden_layer_nodes + 1} nodes in layer {i + 1}: {history.history["loss"][-1]:.4f}, {history.history["val_loss"][-1]:.4f}')

In [27]:
Nt = 1100
Nv = 110
x_train = np.random.uniform(low=-1, high=1, size=(Nt, 8))
y_train = polynomial(x_train) + np.random.normal(scale=noise_sigma, size=y_train.shape)
x_val = np.random.uniform(low=-1, high=1, size=(Nv, 8))
y_val = polynomial(x_val)

add_nodes_with_training(best_params.model, x_train, y_train, x_val, y_val, best_params.learning_rate, epochs, 2)

ValueError: operands could not be broadcast together with shapes (1100,6) (1000,6) 