# Neural network to approximate Runge's function

In [1]:
# Imports
import autograd.numpy as np  # We need to use this numpy wrapper to make automatic differentiation work later
from autograd import grad, elementwise_grad
from sklearn import datasets
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import (
    PolynomialFeatures,
)
import json

from functions import runge, OLS_parameters, MSE
from functions import ReLU, ReLU_der, sigmoid, sigmoid_der, softmax, softmax_vec, mse_der
from functions import MSE, identity, identity_der, R2
from nn_class import NeuralNetwork

In [2]:
np.random.seed(42)
n = 1000
# data set from project 1
x = np.linspace(-1,1, n).reshape(-1,1)
y = runge(x) + 0.1*np.random.normal(0,1, x.shape)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
y_offset = y_train.mean()

poly = PolynomialFeatures(degree=10)
X_train = poly.fit_transform(x_train)
X_test = poly.fit_transform(x_test)
# scaling
scaler = StandardScaler()
scaler.fit(X_train)
X_train_s = scaler.transform(X_train)
X_test_s = scaler.transform(X_test)

In [3]:
# Two layer, 50 neurons, sigmoid activation function, plain gradient descent, rmsprop and adam
layer_output_sizes = [50, 50, 1]
activation_funcs = [sigmoid, sigmoid, identity]
activation_ders = [sigmoid_der, sigmoid_der, identity_der]

scaler.fit(x_train)
x_train_s = scaler.transform(x_train)
x_test_s = scaler.transform(x_test)
inputs = x_train_s
targets = y_train

for option in ['RMSProp', 'ADAM']:
    targets = y_train
    NN = NeuralNetwork(
        x_train_s,
        targets,
        layer_output_sizes,
        activation_funcs,
        activation_ders,
        MSE,
        mse_der,
    )

    NN.train_network_plain_gd(max_iter=1000000000, lr_method=option)

    targets = y_test

    predictions = NN.predict(x_test_s)
    print(f'NN mse ({option}):', MSE(predictions, targets))
    print(f'NN R2 ({option}):', R2(predictions, targets))

    mse = MSE(predictions, targets)
    r2 = R2(predictions, targets)

    metrics = {"mse": float(mse), "r2": float(r2)}
    with open(f"nn_50_sigmoid_{option.lower()}_metrics.json", "w") as f:
        json.dump(metrics, f, indent=2)

Early stopping at iteration 61967, cost change 4.82e-11 <= 1e-10
NN mse (RMSProp): 0.02260516220854157
NN R2 (RMSProp): 0.7620794216007972
Early stopping at iteration 5558, cost change 8.66e-11 <= 1e-10
NN mse (ADAM): 0.009370275269385123
NN R2 (ADAM): 0.8858239098736672


In [3]:
# Two layer, 50 neurons, sigmoid activation function, plain gradient descent, rmsprop and adam, l2
layer_output_sizes = [50, 50, 1]
activation_funcs = [sigmoid, sigmoid, identity]
activation_ders = [sigmoid_der, sigmoid_der, identity_der]

scaler.fit(x_train)
x_train_s = scaler.transform(x_train)
x_test_s = scaler.transform(x_test)
inputs = x_train_s
targets = y_train

for option in ['RMSProp', 'ADAM']:
    targets = y_train
    NN = NeuralNetwork(
        x_train_s,
        targets,
        layer_output_sizes,
        activation_funcs,
        activation_ders,
        MSE,
        mse_der,
        L2 = True,
        lmbda = 0.001,
    )

    NN.train_network_plain_gd(max_iter=1000000000, lr_method=option)

    targets = y_test

    predictions = NN.predict(x_test_s)
    print(f'NN mse ({option}):', MSE(predictions, targets))
    print(f'NN R2 ({option}):', R2(predictions, targets))

    mse = MSE(predictions, targets)
    r2 = R2(predictions, targets)

    metrics = {"mse": float(mse), "r2": float(r2)}
    with open(f"nn_50_sigmoid_{option.lower()}_l2_lmbda_0_001_metrics.json", "w") as f:
        json.dump(metrics, f, indent=2)

Early stopping at iteration 296093, cost change 1.56e-11 <= 1e-10
NN mse (RMSProp): 0.02177745024279322
NN R2 (RMSProp): 0.7689501348574588
Early stopping at iteration 39300, cost change 6.20e-12 <= 1e-10
NN mse (ADAM): 0.009564848875913155
NN R2 (ADAM): 0.8834470025593197


In [None]:
# Two layer, 50 neurons, sigmoid activation function, plain gradient descent, rmsprop and adam, l1
layer_output_sizes = [50, 50, 1]
activation_funcs = [sigmoid, sigmoid, identity]
activation_ders = [sigmoid_der, sigmoid_der, identity_der]

scaler.fit(x_train)
x_train_s = scaler.transform(x_train)
x_test_s = scaler.transform(x_test)
inputs = x_train_s
targets = y_train

for option in ['RMSProp', 'ADAM']:
    targets = y_train
    NN = NeuralNetwork(
        x_train_s,
        targets,
        layer_output_sizes,
        activation_funcs,
        activation_ders,
        MSE,
        mse_der,
        L1 = True,
        lmbda = 0.001,
    )

    NN.train_network_plain_gd(max_iter=1000000000, lr_method=option)

    targets = y_test

    predictions = NN.predict(x_test_s)
    print(f'NN mse ({option}):', MSE(predictions, targets))
    print(f'NN R2 ({option}):', R2(predictions, targets))

    mse = MSE(predictions, targets)
    r2 = R2(predictions, targets)

    metrics = {"mse": float(mse), "r2": float(r2)}
    with open(f"nn_50_sigmoid_{option.lower()}_l1_lmbda_0_001_metrics.json", "w") as f:
        json.dump(metrics, f, indent=2)

Iteration 1000, cost: 0.034124
Iteration 2000, cost: 0.030017
Iteration 3000, cost: 0.029199
Iteration 4000, cost: 0.028682
Iteration 5000, cost: 0.028467
Iteration 6000, cost: 0.028314
Iteration 7000, cost: 0.028192
Iteration 8000, cost: 0.028087
Iteration 9000, cost: 0.027994
Iteration 10000, cost: 0.027908
Iteration 11000, cost: 0.027836
Iteration 12000, cost: 0.027762
Iteration 13000, cost: 0.027703
Iteration 14000, cost: 0.027646
Iteration 15000, cost: 0.027595
Iteration 16000, cost: 0.027545
Iteration 17000, cost: 0.026260
Iteration 18000, cost: 0.026176
Iteration 19000, cost: 0.026125
Iteration 20000, cost: 0.026078
Iteration 21000, cost: 0.026011
Iteration 22000, cost: 0.024787
Iteration 23000, cost: 0.024747
Iteration 24000, cost: 0.024715
Iteration 25000, cost: 0.024692
Iteration 26000, cost: 0.024668
Iteration 27000, cost: 0.024646
Iteration 28000, cost: 0.024628
Iteration 29000, cost: 0.024608
Iteration 30000, cost: 0.024586
Iteration 31000, cost: 0.024555
Iteration 32000, 