In [None]:
import sys, os
from pathlib import Path
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures

project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.append(project_root)

from functions import *

%load_ext autoreload
%autoreload 2
import matplotlib.style as mplstyle

mplstyle.use(["ggplot", "fast"])

plt.rcParams.update({
    'font.size': 16,          
    'axes.titlesize': 18,     
    'axes.labelsize': 16,   
    'xtick.labelsize': 16,  
    'ytick.labelsize': 16,
    'legend.fontsize': 14,    
    'figure.titlesize': 20,   
})
# For saving figures
out_dir = Path("../../Figures")
out_dir.mkdir(parents=True, exist_ok=True)

In [None]:
n = 1500
x = np.linspace(-1, 1, n)
x = x.reshape(-1, 1)

y = runge_function(x) + np.random.normal(0, 0.1, (n,1))

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=2018)
X_train, X_test = X_train.reshape(-1,1), X_test.reshape(-1,1)
y_train, y_test = y_train.reshape(-1,1), y_test.reshape(-1,1)

X_mean = X_train.mean(axis=0)
X_train = (X_train - X_mean)
X_test  = (X_test - X_mean)


In [None]:
poly = PolynomialFeatures(degree=8)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)

theta = np.linalg.pinv(X_train_poly.T @ X_train_poly) @ X_train_poly.T @ y_train
y_pred_OLS = X_test_poly @ theta
sort_idx = np.argsort(X_test[:,0])
X_sort = X_test[sort_idx]
y_sort = y_test[sort_idx]
y_pred_OLS_sort = y_pred_OLS[sort_idx]

In [None]:
reg_nn_gd_1layer = NeuralNetwork(1, [50, 1], [sigmoid, identity], [derivate(sigmoid), derivate(identity)], mse, mse_der)
reg_nn_gd_2layer = NeuralNetwork(1, [50, 100, 1], [sigmoid, sigmoid, identity], [derivate(sigmoid), derivate(sigmoid), derivate(identity)], mse, mse_der)

In [None]:
epochs = 20000
#fig, ax = plt.subplots(figsize=(14,10))
#ax.plot(X_sort,y_pred_OLS_sort,  label="OLS Prediction")


for i in range(5):
    reg_nn_gd_1layer.train(X_train, y_train, epochs=epochs, learning_rate=0.01)
    y_pred_nn_gd = reg_nn_gd_1layer._feed_forward(X_test)[sort_idx]
    #ax.plot(X_sort, y_pred_nn_gd, label=f"NN Pred: {(i+1)*epochs} epochs", linewidth=2)
#ax.scatter(X_test, y_test, label="Test data", alpha = 0.4, s=15)
#fig.suptitle("Comparison of OLS and Neural Network Regression with 50 neurons in hidden layer")
#ax.set_xlabel("x")
#ax.set_ylabel("y")
#ax.legend()

In [None]:
epochs = 20000
#fig, ax = plt.subplots(figsize=(14,10))
#ax.plot(X_sort,y_pred_OLS_sort,  label="OLS Prediction")


for i in range(5):
    reg_nn_gd_2layer.train(X_train, y_train, epochs=epochs, learning_rate=0.01)
    y_pred_nn_gd = reg_nn_gd_2layer._feed_forward(X_test)[sort_idx]
    #ax.plot(X_sort, y_pred_nn_gd, label=f"NN Pred: {(i+1)*epochs} epochs", linewidth=2)
#ax.scatter(X_test, y_test, label="Test data", alpha = 0.4, s=15)
#fig.suptitle("Comparison of OLS and Neural Network Regression with 50 neurons in hidden layer")
#ax.set_xlabel("x")
#ax.set_ylabel("y")
#ax.legend()


In [None]:
# Cost histories.
fig, ax = plt.subplots(figsize=(10,6))
ax.plot(reg_nn_gd_1layer.get_info()['Cost_history'], label="1 layer NN")
ax.plot(reg_nn_gd_2layer.get_info()['Cost_history'], label="2 layer NN") 
ax.annotate(f"1 Layer: Final training cost: {reg_nn_gd_1layer.get_info()['Cost_history'][-1]:.4f}", 
            xy=(len(reg_nn_gd_1layer.get_info()['Cost_history'])-1, reg_nn_gd_1layer.get_info()['Cost_history'][-1]), 
            xytext=(len(reg_nn_gd_1layer.get_info()['Cost_history'])-60000, reg_nn_gd_1layer.get_info()['Cost_history'][-1]+0.75),
            arrowprops=dict(facecolor='black', arrowstyle='->'),
            )
ax.annotate(f"2 Layer: Final training cost: {reg_nn_gd_2layer.get_info()['Cost_history'][-1]:.4f}", 
            xy=(len(reg_nn_gd_2layer.get_info()['Cost_history'])-1, reg_nn_gd_2layer.get_info()['Cost_history'][-1]), 
            xytext=(len(reg_nn_gd_2layer.get_info()['Cost_history'])-60000, reg_nn_gd_2layer.get_info()['Cost_history'][-1]+0.55)
            )
ax.set_xlabel("Epochs")
ax.set_ylabel("MSE Cost")
ax.set_yscale("log")
fig.suptitle("Cost history for 1 and 2 layer Neural Networks.")
ax.legend()




In [None]:
# fig, ax = plt.subplots(figsize=(14,10))

y_pred_nn_gd_1layer = reg_nn_gd_1layer._feed_forward(X_test)
y_pred_nn_gd_2layer = reg_nn_gd_2layer._feed_forward(X_test)

# ax.plot(X_sort, y_pred_nn_gd_1layer[sort_idx], label=f"One-layer NN Pred", linewidth=5)
# ax.plot(X_sort, y_pred_nn_gd_2layer[sort_idx], label=f"Two-layer NN Pred", linewidth=5)
# ax.plot(X_sort,y_pred_OLS_sort,  label="OLS Prediction", linewidth=5)
# ax.scatter(X_sort, y_sort,label="Test data", color="black", s=20)
# fig.suptitle("Predictions from OLS, one-layer and two-layer Neural Networks")
# ax.set_xlabel("x")
# ax.set_ylabel("y")
# ax.legend()

print(mse(y_test, y_pred_nn_gd_1layer), mse(y_test, y_pred_nn_gd_2layer), mse(y_test, y_pred_OLS))

In [None]:
lrs = [0.001, 0.01, 0.1, 0.2, 0.3,0.4, 0.5, 1.0, 1.5, 2.0]
mses_1_test = []
mses_1_train = []
#fig, ax = plt.subplots(figsize=(13, 10))
#fig.suptitle("Change in NN Prediction with different Learning Rates\n and cost history of training")
for i in lrs:
    
    reg_1layer_lr = NeuralNetwork(1, [50, 1], [sigmoid, identity], [derivate(sigmoid), derivate(identity)], mse, mse_der)
    reg_1layer_lr.train(X_train, y_train, epochs=10000, learning_rate=i)
    y_pred_nn_gd = reg_1layer_lr._feed_forward(X_test)
    cost = reg_1layer_lr.get_info()['Cost_history']
    mses_1_test.append(mse(y_pred_nn_gd, y_test))
    mses_1_train.append(cost[-1])
   
#     ax.plot(cost, label=f"LR: {i}")
# ax.set_xlabel("Epochs")
# ax.set_ylabel("Cost")
# ax.legend()

# plt.tight_layout()
# plt.show()
   

In [None]:
mses_2_test = []
mses_2_train = []
#fig, ax = plt.subplots(figsize=(13, 10))
#fig.suptitle("Change in NN Prediction with different Learning Rates\n and cost history of training")
for i in lrs:
    
    reg_2layer_lr = NeuralNetwork(1, [50, 100, 1], [sigmoid, sigmoid, identity], [derivate(sigmoid),derivate(sigmoid), derivate(identity)], mse, mse_der)
    reg_2layer_lr.train(X_train, y_train, epochs=10000, learning_rate=i)
    y_pred_nn_gd = reg_2layer_lr._feed_forward(X_test)
    cost = reg_2layer_lr.get_info()['Cost_history']
    mses_2_test.append(mse(y_pred_nn_gd, y_test))
    mses_2_train.append(cost[-1])
    

#     ax.plot(cost, label=f"LR: {i}")
# ax.set_xlabel("Epochs")
# ax.set_ylabel("Cost")
# ax.legend()

# plt.tight_layout()
# plt.show()

In [None]:
fig, ax = plt.subplots(figsize = (10,8))

ax.scatter(lrs, mses_2_test, label="2 layer: test", s=75)
ax.scatter(lrs, mses_2_train, label="2 layer: train", s=75)
ax.scatter(lrs, mses_1_test, label = "1 layer: test", s=75)
ax.scatter(lrs, mses_1_train, label = "1 layer: train", color="black", s=75)
ax.legend()
ax.set_yscale("log")
ax.set_xscale("log")
ax.set_xlabel("Learning Rate")
ax.set_ylabel("MSE")
fig.suptitle("MSE for different learning rates and network architectures")
plt.savefig(out_dir / "MSE_lr.pdf")

In [None]:
l = ["gd", "RMSProp","Adam"]
lr = [0.001, 0.01, 0.1, 0.2]
mse_test = []
mse_train = []

for j in l:
    a = 1
    for i in lr:
        reg_ffnn = NeuralNetwork(1, [50, 1], [sigmoid, identity], [derivate(sigmoid), derivate(identity)], mse, mse_der)
        reg_ffnn.train_SGD(X_train, y_train, epochs=300, learning_rate=i, batch_size=20, optimizer=j)
        mse_test.append(mse(y_test, reg_ffnn._feed_forward(X_test)))
        mse_train.append(reg_ffnn.get_info()['Cost_history'][-1])
        plt.plot(reg_ffnn.get_info()['Cost_history'], label=f"{j} with LR={i}", alpha=a)
        a -= 0.15
    plt.legend()
    plt.xlabel("Epochs")
    plt.ylabel("MSE Cost")
    plt.title(f"Cost history using {j} optimizer with different learning rates")
    if j == "RMSProp":
        plt.yscale("log")
    plt.show()


In [None]:
print("test \n")
for i in mse_test:
    print(f"{i:.4f}")
print("\ntrain\n")
for i in mse_train:
    print(f"{i:.4f}")

In [None]:
l = ["gd", "RMSProp","Adam"]
lr = [0.001, 0.01, 0.1, 0.2]
mse_test = []
mse_train = []

for j in l:
    a = 1
    for i in lr:
        reg_ffnn = NeuralNetwork(1, [50,100, 1], [sigmoid, sigmoid,  identity], [derivate(sigmoid), derivate(sigmoid), derivate(identity)], mse, mse_der)
        reg_ffnn.train_SGD(X_train, y_train, epochs=300, learning_rate=i, batch_size=20, optimizer=j)
        mse_test.append(mse(y_test, reg_ffnn._feed_forward(X_test)))
        mse_train.append(reg_ffnn.get_info()['Cost_history'][-1])
        plt.plot(reg_ffnn.get_info()['Cost_history'], label=f"{j} with LR={i}", alpha=a)
        a -= 0.15
    plt.legend()
    plt.xlabel("Epochs")
    plt.ylabel("MSE Cost")
    plt.title(f"Cost history using {j} optimizer with different learning rates")
    if j == "RMSProp":
        plt.yscale("log")
    plt.savefig(f"Figures/{j}_ch_2layer.pdf")
    plt.show()

In [None]:
print("test \n")
for i in mse_test:
    print(f"{i:.4f}")
print("\ntrain\n")
for i in mse_train:
    print(f"{i:.4f}")

In [None]:
reg_ffnn_one = NeuralNetwork(1, [50, 1], [sigmoid, identity], [derivate(sigmoid), derivate(identity)], mse, mse_der)
reg_ffnn_two = NeuralNetwork(1, [50,100, 1], [sigmoid, sigmoid,  identity], [derivate(sigmoid), derivate(sigmoid), derivate(identity)], mse, mse_der)

reg_ffnn_one.train_SGD(X_train, y_train, epochs=300, learning_rate=0.01, batch_size=20, optimizer="Adam")
reg_ffnn_two.train_SGD(X_train, y_train, epochs=300, learning_rate=0.01, batch_size=20, optimizer="Adam")

y_pred_one = reg_ffnn_one._feed_forward(X_test)
y_pred_two = reg_ffnn_two._feed_forward(X_test)


In [None]:
fig, ax = plt.subplots(figsize=(14,10))
ax.plot(X_sort, y_pred_one[sort_idx], label="One-layer NN Prediction", linewidth=5)
ax.plot(X_sort, y_pred_two[sort_idx], label="Two-layer NN Prediction", linewidth=5)
ax.plot(X_sort,y_pred_OLS_sort,  label="OLS Prediction", linewidth=5)
ax.scatter(X_sort, y_sort,label="Test data", color="black", s=20, alpha = 0.5)
fig.suptitle("Predictions from OLS, one-layer and two-layer Neural Networks")
ax.set_xlabel("x")
ax.set_ylabel("y")
ax.legend()
plt.show()