# Neural network to approximate Runge's function

In [1]:
# Imports
import autograd.numpy as np  # We need to use this numpy wrapper to make automatic differentiation work later
from autograd import grad, elementwise_grad
from sklearn import datasets
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import (
    PolynomialFeatures,
)
import json

from functions import runge, OLS_parameters, MSE
from functions import ReLU, ReLU_der, sigmoid, sigmoid_der, softmax, softmax_vec, mse_der
from functions import MSE, identity, identity_der, R2
from nn_class import NeuralNetwork

In [2]:
import os

with open("data/xy.json", "r") as f:
    data = json.load(f)

# reconstruct arrays with same shape as originally saved
x = np.array(data["x"]).reshape(-1, 1)
y = np.array(data["y"]).reshape(-1, 1)

# recuperate dimensions
n_samples, n_features = x.shape
print("Loaded shapes -> x:", x.shape, "y:", y.shape)
print("n_samples:", n_samples, "n_features:", n_features)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
y_offset = y_train.mean()

poly = PolynomialFeatures(degree=10)
X_train = poly.fit_transform(x_train)
X_test = poly.fit_transform(x_test)
# scaling
scaler = StandardScaler()
scaler.fit(X_train)
X_train_s = scaler.transform(X_train)
X_test_s = scaler.transform(X_test)

Loaded shapes -> x: (1000, 1) y: (1000, 1)
n_samples: 1000 n_features: 1


In [3]:
# One layer, 50 neurons, sigmoid activation function, stochastic gradient descent, no optimizations
layer_output_sizes = [50, 1]
activation_funcs = [sigmoid, identity]
activation_ders = [sigmoid_der, identity_der]

scaler.fit(x_train)
x_train_s = scaler.transform(x_train)
x_test_s = scaler.transform(x_test)
inputs = x_train_s
targets = y_train

NN = NeuralNetwork(
    x_train_s,
    targets,
    layer_output_sizes,
    activation_funcs,
    activation_ders,
    MSE,
    mse_der,
)

NN.train_network_stochastic_gd(epochs=10000)

targets = y_test

predictions = NN.predict(x_test_s)
print('NN mse:', MSE(predictions, targets))
print('NN R2:', R2(predictions, targets))

mse_NN_50_sigmoid_sgd_no = MSE(predictions, targets)
R2_NN_50_sigmoid_sgd_no = R2(predictions, targets)

metrics = {"mse": float(mse_NN_50_sigmoid_sgd_no), "r2": float(R2_NN_50_sigmoid_sgd_no)}
with open("data/nn_50_sigmoid_sgd_l2_0_01.json", "w") as f:
    json.dump(metrics, f, indent=2)

NN mse: 0.009887540369247511
NN R2: 0.8795319041638417


In [4]:
# One layer, 100 neurons, sigmoid activation function, stochastic gradient descent, no optimizations
layer_output_sizes = [100, 1]
activation_funcs = [sigmoid, identity]
activation_ders = [sigmoid_der, identity_der]

scaler.fit(x_train)
x_train_s = scaler.transform(x_train)
x_test_s = scaler.transform(x_test)
inputs = x_train_s
targets = y_train

NN = NeuralNetwork(
    x_train_s,
    targets,
    layer_output_sizes,
    activation_funcs,
    activation_ders,
    MSE,
    mse_der,
    L2=True,
    lmbda=0.01
)

NN.train_network_stochastic_gd(epochs=10000)

targets = y_test

predictions = NN.predict(x_test_s)
print('NN mse:', MSE(predictions, targets))
print('NN R2:', R2(predictions, targets))

mse_NN_100_sigmoid_sgd_no = MSE(predictions, targets)
R2_NN_100_sigmoid_sgd_no = R2(predictions, targets)

metrics = {"mse": float(mse_NN_100_sigmoid_sgd_no), "r2": float(R2_NN_100_sigmoid_sgd_no)}
with open("data/nn_100_sigmoid_sgd_l2_0_01.json", "w") as f:
    json.dump(metrics, f, indent=2)

NN mse: 0.011049828568476965
NN R2: 0.8661610462439043


In [5]:
# 2 layer, 50 neurons, sigmoid activation function, stochastic gradient descent, no optimizations
layer_output_sizes = [50, 50, 1]
activation_funcs = [sigmoid, sigmoid, identity]
activation_ders = [sigmoid_der, sigmoid_der, identity_der]

scaler.fit(x_train)
x_train_s = scaler.transform(x_train)
x_test_s = scaler.transform(x_test)
inputs = x_train_s
targets = y_train

NN = NeuralNetwork(
    x_train_s,
    targets,
    layer_output_sizes,
    activation_funcs,
    activation_ders,
    MSE,
    mse_der,
    L2=True,
    lmbda=0.01
)

NN.train_network_stochastic_gd(epochs=10000)

targets = y_test

predictions = NN.predict(x_test_s)
print('NN mse:', MSE(predictions, targets))
print('NN R2:', R2(predictions, targets))

mse_NN_50_50_sigmoid_sgd_no = MSE(predictions, targets)
R2_NN_50_50_sigmoid_sgd_no = R2(predictions, targets)

metrics = {"mse": float(mse_NN_50_50_sigmoid_sgd_no), "r2": float(R2_NN_50_50_sigmoid_sgd_no)}
with open("data/nn_50_50_sigmoid_sgd_l2_0_01.json", "w") as f:
    json.dump(metrics, f, indent=2)

NN mse: 0.009861487143472484
NN R2: 0.8802308670839053


In [6]:
# 2 layer, 100 neurons, sigmoid activation function, stochastic gradient descent, no optimizations
layer_output_sizes = [100, 100, 1]
activation_funcs = [sigmoid, sigmoid, identity]
activation_ders = [sigmoid_der, sigmoid_der, identity_der]

scaler.fit(x_train)
x_train_s = scaler.transform(x_train)
x_test_s = scaler.transform(x_test)
inputs = x_train_s
targets = y_train

NN = NeuralNetwork(
    x_train_s,
    targets,
    layer_output_sizes,
    activation_funcs,
    activation_ders,
    MSE,
    mse_der,
    L2=True,
    lmbda=0.01,
)

NN.train_network_stochastic_gd(epochs=10000)

targets = y_test

predictions = NN.predict(x_test_s)
print('NN mse:', MSE(predictions, targets))
print('NN R2:', R2(predictions, targets))

mse_NN_100_100_sigmoid_sgd_no = MSE(predictions, targets)
R2_NN_100_100_sigmoid_sgd_no = R2(predictions, targets)

metrics = {"mse": float(mse_NN_100_100_sigmoid_sgd_no), "r2": float(R2_NN_100_100_sigmoid_sgd_no)}
with open("data/nn_100_100_sigmoid_sgd_l2_0_01.json", "w") as f:
    json.dump(metrics, f, indent=2)

NN mse: 0.010136033668166495
NN R2: 0.8772918718443664


In [3]:
# One layer, 50 neurons, sigmoid activation function, sgd, rmsprop and adam
layer_output_sizes = [50, 1]
activation_funcs = [sigmoid, identity]
activation_ders = [sigmoid_der, identity_der]

scaler.fit(x_train)
x_train_s = scaler.transform(x_train)
x_test_s = scaler.transform(x_test)
inputs = x_train_s
targets = y_train

for option in ['RMSProp', 'ADAM']:
    targets = y_train
    NN = NeuralNetwork(
        x_train_s,
        targets,
        layer_output_sizes,
        activation_funcs,
        activation_ders,
        MSE,
        mse_der,
        L2=True,
        lmbda=0.01,
    )

    NN.train_network_stochastic_gd(epochs=10000, lr_method=option)

    targets = y_test

    predictions = NN.predict(x_test_s)
    print(f'NN mse ({option}):', MSE(predictions, targets))
    print(f'NN R2 ({option}):', R2(predictions, targets))

    mse = MSE(predictions, targets)
    r2 = R2(predictions, targets)

    metrics = {"mse": float(mse), "r2": float(r2)}
    with open(f"data/nn_50_sigmoid_sgd_{option.lower()}_l2_0_01.json", "w") as f:
        json.dump(metrics, f, indent=2)

NN mse (RMSProp): 0.00980201823611613
NN R2 (RMSProp): 0.8807568118902891
NN mse (ADAM): 0.010070934921866379
NN R2 (ADAM): 0.8774305883304946


In [4]:
# Two layer, 50 neurons, sigmoid activation function, sgd, rmsprop and adam
layer_output_sizes = [50, 50, 1]
activation_funcs = [sigmoid, sigmoid, identity]
activation_ders = [sigmoid_der, sigmoid_der, identity_der]

scaler.fit(x_train)
x_train_s = scaler.transform(x_train)
x_test_s = scaler.transform(x_test)
inputs = x_train_s
targets = y_train

for option in ['RMSProp', 'ADAM']:
    targets = y_train
    NN = NeuralNetwork(
        x_train_s,
        targets,
        layer_output_sizes,
        activation_funcs,
        activation_ders,
        MSE,
        mse_der,
        L2=True,
        lmbda=0.01,
    )

    NN.train_network_stochastic_gd(epochs=10000, lr_method=option)

    targets = y_test

    predictions = NN.predict(x_test_s)
    print(f'NN mse ({option}):', MSE(predictions, targets))
    print(f'NN R2 ({option}):', R2(predictions, targets))

    mse = MSE(predictions, targets)
    r2 = R2(predictions, targets)

    metrics = {"mse": float(mse), "r2": float(r2)}
    with open(f"data/nn_50_50_sigmoid_sgd_{option.lower()}_l2_0_01.json", "w") as f:
        json.dump(metrics, f, indent=2)

NN mse (RMSProp): 0.013957845634732373
NN R2 (RMSProp): 0.8380390367257018
NN mse (ADAM): 0.01183202499932961
NN R2 (ADAM): 0.8562150985094625


In [5]:
# One layer, 100 neurons, sigmoid activation function, sgd, rmsprop and adam
layer_output_sizes = [100, 1]
activation_funcs = [sigmoid, identity]
activation_ders = [sigmoid_der, identity_der]

scaler.fit(x_train)
x_train_s = scaler.transform(x_train)
x_test_s = scaler.transform(x_test)
inputs = x_train_s
targets = y_train

for option in ['RMSProp', 'ADAM']:
    targets = y_train
    NN = NeuralNetwork(
        x_train_s,
        targets,
        layer_output_sizes,
        activation_funcs,
        activation_ders,
        MSE,
        mse_der,
        L2=True,
        lmbda=0.01,
    )

    NN.train_network_stochastic_gd(epochs=10000, lr_method=option)

    targets = y_test

    predictions = NN.predict(x_test_s)
    print(f'NN mse ({option}):', MSE(predictions, targets))
    print(f'NN R2 ({option}):', R2(predictions, targets))

    mse = MSE(predictions, targets)
    r2 = R2(predictions, targets)

    metrics = {"mse": float(mse), "r2": float(r2)}
    with open(f"data/nn_100_sigmoid_sgd_{option.lower()}_l2_0_01.json", "w") as f:
        json.dump(metrics, f, indent=2)

NN mse (RMSProp): 0.011225272915837565
NN R2 (RMSProp): 0.8633282806494224
NN mse (ADAM): 0.011136116672581626
NN R2 (ADAM): 0.8666133139653888


In [6]:
# Two layer, 100 neurons, sigmoid activation function, sgd, rmsprop and adam
layer_output_sizes = [100, 100, 1]
activation_funcs = [sigmoid, sigmoid, identity]
activation_ders = [sigmoid_der, sigmoid_der, identity_der]

scaler.fit(x_train)
x_train_s = scaler.transform(x_train)
x_test_s = scaler.transform(x_test)
inputs = x_train_s
targets = y_train

for option in ['RMSProp', 'ADAM']:
    targets = y_train
    NN = NeuralNetwork(
        x_train_s,
        targets,
        layer_output_sizes,
        activation_funcs,
        activation_ders,
        MSE,
        mse_der,
        L2=True,
        lmbda=0.01,
    )

    NN.train_network_stochastic_gd(epochs=10000, lr_method=option)

    targets = y_test

    predictions = NN.predict(x_test_s)
    print(f'NN mse ({option}):', MSE(predictions, targets))
    print(f'NN R2 ({option}):', R2(predictions, targets))

    mse = MSE(predictions, targets)
    r2 = R2(predictions, targets)

    metrics = {"mse": float(mse), "r2": float(r2)}
    with open(f"data/nn_100_100_sigmoid_sgd_{option.lower()}_l2_0_01.json", "w") as f:
        json.dump(metrics, f, indent=2)

NN mse (RMSProp): 0.034235245010798165
NN R2 (RMSProp): 0.6785122539259052
NN mse (ADAM): 0.009560599695058961
NN R2 (ADAM): 0.883496959194617
