In [4]:
# Importing various packages
from random import random, seed
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter
import sys
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error as mse
import time
from sklearn.preprocessing import StandardScaler
import random
import seaborn as sns
from activation_functions import *
from optimizer_functions import *
from cost_functions import *
from Layer import Layer
from NeuralNetwork import NeuralNetwork

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras import optimizers             
from tensorflow.keras import regularizers           
from tensorflow.keras.utils import to_categorical   
from sklearn.neural_network import MLPRegressor

In [None]:
n = 1000
x = np.random.rand(n,1)
y = 1+3*x+2*x**2+0.1*np.random.randn(n,1)

x_lins = np.linspace(0,1,100)

X = np.c_[np.ones((n,1)), x, x**2]

Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2)

In [None]:
# grid search number of neurons and eta:

n_neurons_list = [10,20,30,50,75]
eta_list = [1e-5, 1e-4, 0.001, 0.01, 0.015]
MSE = np.zeros((len(n_neurons_list),len(eta_list)))

for i in range(len(n_neurons_list)):
    for j in range(len(eta_list)):
        
        n_neurons = n_neurons_list[i]
        eta = eta_list[j]

        nn = NeuralNetwork(3,regr_cost_grad,random_state=1)
        layer1 = Layer(n_neurons,sigmoid,sigmoid_grad)
        layer2 = Layer(1,linear,linear_grad)
        nn.add_layer(layer1)
        nn.add_layer(layer2)
        nn.train(Xtrain,ytrain,initial_learning_rate=eta,epochs=100)
        MSE[i][j] = mse(nn.feed_forward_out(Xtest),ytest)

In [None]:
plt.figure(figsize=(15,10))
sns.heatmap(MSE, xticklabels=eta_list, yticklabels=n_neurons_list, annot=True, annot_kws={"size": 8.5}, fmt=".3f")
plt.xlabel("Learning rate")
plt.ylabel("Number of neurons")
plt.title("MSE grid search for learning rate and number of neurons")
#plt.savefig("Grid search for learning rate and epochs.png")

In [None]:
# evoluzione test e training durante le epochs

n_epochs=500
eta = 0.00001
step=10

nn = NeuralNetwork(3,regr_cost_grad,random_state=1)
layer1 = Layer(30,sigmoid,sigmoid_grad)
layer2 = Layer(1,linear,linear_grad)
nn.add_layer(layer1)
nn.add_layer(layer2)

MSE_train = []
MSE_test = []

for i in range(0,n_epochs,step):
    MSE_train.append(mse(nn.feed_forward_out(Xtrain),ytrain))
    MSE_test.append(mse(nn.feed_forward_out(Xtest),ytest))
    nn.train(Xtrain,ytrain,initial_learning_rate=eta,epochs=step,minibatch_size = 50)

In [None]:
plt.figure(figsize=(10,8))
plt.plot(range(0,n_epochs,step),MSE_train,label='Train')
plt.plot(range(0,n_epochs,step),MSE_test,label='Test')
plt.legend()
plt.title("Training and test error vs Epochs")
plt.xlabel("Epochs")
plt.ylabel("Error")
plt.ylim(-0.1,2)
#plt.savefig("train_test_error.png")

In [None]:
# confronto numero di layer 1,2,3

# evoluzione test e training durante le epochs

n_epochs = 100
eta = 0.0001
step = 5

nn_1 = NeuralNetwork(3,regr_cost_grad,random_state=1)
nn_2 = NeuralNetwork(3,regr_cost_grad,random_state=1)
nn_3 = NeuralNetwork(3,regr_cost_grad,random_state=1)
layer11 = Layer(30,sigmoid,sigmoid_grad)
layer21 = Layer(1,linear,linear_grad)
layer12 = Layer(30,sigmoid,sigmoid_grad)
layer22 = Layer(30,sigmoid,sigmoid_grad)
layer32 = Layer(1,linear,linear_grad)
layer13 = Layer(30,sigmoid,sigmoid_grad)
layer23 = Layer(30,sigmoid,sigmoid_grad)
layer33 = Layer(30,sigmoid,sigmoid_grad)
layer43 = Layer(1,linear,linear_grad)

nn_1.add_layer(layer11)
nn_1.add_layer(layer21)
nn_2.add_layer(layer12)
nn_2.add_layer(layer22)
nn_2.add_layer(layer32)
nn_3.add_layer(layer13)
nn_3.add_layer(layer23)
nn_3.add_layer(layer33)
nn_3.add_layer(layer43)

MSE_1 = []
MSE_2 = []
MSE_3 = []


for i in range(0,n_epochs,step):
    MSE_1.append(mse(nn_1.feed_forward_out(Xtest),ytest))
    MSE_2.append(mse(nn_2.feed_forward_out(Xtest),ytest))
    MSE_3.append(mse(nn_3.feed_forward_out(Xtest),ytest))
    nn_1.train(Xtrain,ytrain,initial_learning_rate=eta,epochs=step,minibatch_size = 20)
    nn_2.train(Xtrain,ytrain,initial_learning_rate=eta,epochs=step,minibatch_size = 20)
    nn_3.train(Xtrain,ytrain,initial_learning_rate=eta,epochs=step,minibatch_size = 20)

In [None]:
plt.figure(figsize=(10,8))
plt.plot(range(0,n_epochs,step),MSE_1,label='1 hidden layer')
plt.plot(range(0,n_epochs,step),MSE_2,label='2 hidden layers')
plt.plot(range(0,n_epochs,step),MSE_3,label='3 hidden layers')
plt.legend()
plt.title("Test error vs Epochs")
plt.xlabel("Epochs")
plt.ylabel("Error")
plt.ylim(-0.1,2.5)
#plt.savefig("number_of_hidden_layers.png")

In [None]:
# gd and sgd (change minibatch size)

# evoluzione test e training durante le epochs

n_epochs = 400
eta = 0.00001
step = 5

nn_gd = NeuralNetwork(3,regr_cost_grad,random_state=1)
nn_sgd1 = NeuralNetwork(3,regr_cost_grad,random_state=1)
nn_sgd2 = NeuralNetwork(3,regr_cost_grad,random_state=1)
nn_sgd3 = NeuralNetwork(3,regr_cost_grad,random_state=1)
layer1gd = Layer(30,sigmoid,sigmoid_grad)
layer2gd = Layer(1,linear,linear_grad)
layer1sgd1 = Layer(30,sigmoid,sigmoid_grad)
layer2sgd1 = Layer(1,linear,linear_grad)
layer1sgd2 = Layer(30,sigmoid,sigmoid_grad)
layer2sgd2 = Layer(1,linear,linear_grad)
layer1sgd3 = Layer(30,sigmoid,sigmoid_grad)
layer2sgd3 = Layer(1,linear,linear_grad)
nn_gd.add_layer(layer1gd)
nn_gd.add_layer(layer2gd)
nn_sgd1.add_layer(layer1sgd1)
nn_sgd1.add_layer(layer2sgd1)
nn_sgd2.add_layer(layer1sgd2)
nn_sgd2.add_layer(layer2sgd2)
nn_sgd3.add_layer(layer1sgd3)
nn_sgd3.add_layer(layer2sgd3)

MSE_gd = []
MSE_sgd1 = []
MSE_sgd2 = []
MSE_sgd3 = []

for i in range(0,n_epochs,step):
    MSE_gd.append(mse(nn_gd.feed_forward_out(Xtest),ytest))
    MSE_sgd1.append(mse(nn_sgd1.feed_forward_out(Xtest),ytest))
    MSE_sgd2.append(mse(nn_sgd2.feed_forward_out(Xtest),ytest))
    MSE_sgd3.append(mse(nn_sgd3.feed_forward_out(Xtest),ytest))
    nn_gd.train(Xtrain,ytrain,initial_learning_rate=eta,epochs=step,minibatch_size = 1000)
    nn_sgd1.train(Xtrain,ytrain,initial_learning_rate=eta,epochs=step,minibatch_size = 50)
    nn_sgd2.train(Xtrain,ytrain,initial_learning_rate=eta,epochs=step,minibatch_size = 20)
    nn_sgd3.train(Xtrain,ytrain,initial_learning_rate=eta,epochs=step,minibatch_size = 1)

In [None]:
n_epochs = 400
eta = 0.00001
step = 5

plt.figure(figsize=(10,8))
plt.plot(range(0,n_epochs,step),MSE_gd,label='GD')
plt.plot(range(0,n_epochs,step),MSE_sgd1,label='SGD minibatch_size=50')
plt.plot(range(0,n_epochs,step),MSE_sgd2,label='SGD minibatch_size=20')
plt.plot(range(0,n_epochs,step),MSE_sgd3,label='SGD minibatch_size=1')
plt.legend()
plt.title("Test error vs Epochs")
plt.xlabel("Epochs")
plt.ylabel("Error")
plt.ylim(-0.1,2)
plt.savefig("GD vs SGD test error")

In [None]:
#sigmoid
# grid search regularization and learning rate

eta_list = [1e-5, 1e-4, 0.001, 0.01, 0.015]
reg_list = [0, 0.01, 0.1, 1, 10]
MSE_sigm = np.zeros((len(reg_list),len(eta_list)))

for j in range(len(eta_list)):
    for i in range(len(reg_list)):
        
        reg = reg_list[i]
        eta = eta_list[j]

        nn = NeuralNetwork(3,regr_cost_grad,random_state=1)
        layer1 = Layer(30,sigmoid,sigmoid_grad)
        layer2 = Layer(1,linear,linear_grad)
        nn.add_layer(layer1)
        nn.add_layer(layer2)
        nn.train(Xtrain,ytrain,initial_learning_rate=eta,regularization=reg, epochs=200)
        MSE_sigm[i][j] = mse(nn.feed_forward_out(Xtest),ytest)

# best results for 0.0001 and 0

In [None]:
#relu
# grid search regularization and learning rate

eta_list = [1e-5, 1e-4, 0.001, 0.01, 0.015]
reg_list = [0, 0.01, 0.1, 1, 10]
MSE_relu = np.zeros((len(reg_list),len(eta_list)))

for j in range(len(eta_list)):
    for i in range(len(reg_list)):
        
        reg = reg_list[i]
        eta = eta_list[j]

        nn = NeuralNetwork(3,regr_cost_grad,random_state=1)
        layer1 = Layer(30,ReLU,ReLU_grad)
        layer2 = Layer(1,linear,linear_grad)
        nn.add_layer(layer1)
        nn.add_layer(layer2)
        nn.train(Xtrain,ytrain,initial_learning_rate=eta,regularization=reg, epochs=200)
        MSE_relu[i][j] = mse(nn.feed_forward_out(Xtest),ytest)

# best results for 0.0001 and 0

In [None]:
#leakyrelu
# grid search regularization and learning rate

eta_list = [1e-5, 1e-4, 0.001, 0.01, 0.015]
reg_list = [0, 0.01, 0.1, 1, 10]
MSE_lrelu = np.zeros((len(reg_list),len(eta_list)))

for j in range(len(eta_list)):
    for i in range(len(reg_list)):
        
        reg = reg_list[i]
        eta = eta_list[j]

        nn = NeuralNetwork(3,regr_cost_grad,random_state=1)
        layer1 = Layer(30,leakyReLU,leakyReLU_grad)
        layer2 = Layer(1,linear,linear_grad)
        nn.add_layer(layer1)
        nn.add_layer(layer2)
        nn.train(Xtrain,ytrain,initial_learning_rate=eta,regularization=reg, epochs=200)
        MSE_lrelu[i][j] = mse(nn.feed_forward_out(Xtest),ytest)

In [None]:
#tanh
# grid search regularization and learning rate

eta_list = [1e-5, 1e-4, 0.001, 0.01, 0.015]
reg_list = [0, 0.01, 0.1, 1, 10]
MSE_tanh = np.zeros((len(reg_list),len(eta_list)))

for j in range(len(eta_list)):
    for i in range(len(reg_list)):
       
        reg = reg_list[i]
        eta = eta_list[j]

        nn = NeuralNetwork(3,regr_cost_grad,random_state=1)
        layer1 = Layer(30,tanh,tanh_grad)
        layer2 = Layer(1,linear,linear_grad)
        nn.add_layer(layer1)
        nn.add_layer(layer2)
        nn.train(Xtrain,ytrain,initial_learning_rate=eta,regularization=reg, epochs=200)
        MSE_tanh[i][j] = mse(nn.feed_forward_out(Xtest),ytest)

In [None]:
plt.figure(figsize=(15,10))
sns.heatmap(MSE_sigm, xticklabels=eta_list, yticklabels=reg_list, annot=True, annot_kws={"size": 8.5}, fmt=".3f")
plt.ylabel("Regularization parameter")
plt.xlabel("Learning rate")
plt.title("MSE grid search for learning rate and regularization parameter")
#plt.savefig("sigmoid_grid_search.png")

In [None]:
plt.figure(figsize=(15,10))
sns.heatmap(MSE_relu, vmin=0.007, vmax=9.405, xticklabels=eta_list, yticklabels=reg_list, annot=True, annot_kws={"size": 8.5}, fmt=".3f")
plt.ylabel("Regularization parameter")
plt.xlabel("Learning rate")
plt.title("MSE grid search for learning rate and regularization parameter")
#plt.savefig("RELU_Grid_search.png")

In [None]:
plt.figure(figsize=(15,10))
sns.heatmap(MSE_lrelu, vmin=0.007, vmax=9.405, xticklabels=eta_list, yticklabels=reg_list, annot=True, annot_kws={"size": 8.5}, fmt=".3f")
plt.ylabel("Regularization parameter")
plt.xlabel("Learning rate")
plt.title("MSE grid search for learning rate and regularization parameter")
plt.savefig("lrelu_grid_search.png")

In [None]:
plt.figure(figsize=(15,10))
sns.heatmap(MSE_tanh, xticklabels=eta_list, yticklabels=reg_list, annot=True, annot_kws={"size": 8.5}, fmt=".3f")
plt.ylabel("Regularization parameter")
plt.xlabel("Learning rate")
plt.title("MSE grid search for learning rate and regularization parameter")
plt.savefig("tanh_grid_search.png")

In [None]:
# different learning rate schedules

# evoluzione test e training durante le epochs

n_epochs = 100
eta_const = 0.0001
init_eta = 0.01
final_eta = 0.0001
step = 5

nn_1 = NeuralNetwork(3,regr_cost_grad,random_state=1)
nn_2 = NeuralNetwork(3,regr_cost_grad,random_state=1)
layer11 = Layer(30,sigmoid,sigmoid_grad)
layer21 = Layer(1,linear,linear_grad)
layer12 = Layer(30,sigmoid,sigmoid_grad)
layer22 = Layer(1,linear,linear_grad)

nn_1.add_layer(layer11)
nn_1.add_layer(layer21)
nn_2.add_layer(layer12)
nn_2.add_layer(layer22)

MSE_1 = []
MSE_2 = []

for i in range(0,n_epochs,step):
    MSE_1.append(mse(nn_1.feed_forward_out(Xtest),ytest))
    MSE_2.append(mse(nn_2.feed_forward_out(Xtest),ytest))

    nn_1.train(Xtrain,ytrain,initial_learning_rate=eta_const,epochs=step,minibatch_size = 20)

    eta = init_eta + (final_eta-init_eta)*i/n_epochs
    nn_2.train(Xtrain,ytrain,initial_learning_rate=0.01, epochs=step,minibatch_size = 20)

In [None]:
plt.figure(figsize=(10,8))
plt.plot(range(0,n_epochs,step),MSE_1,label='Constant learning rate')
plt.plot(range(0,n_epochs,step),MSE_2,label='Adapting learning rate')
plt.legend()
plt.title("Test error vs Epochs")
plt.xlabel("Epochs")
plt.ylabel("Error")
plt.ylim(-0.1,1)
#plt.savefig("constant_adapting_lr.png")

In [None]:
# comparison with previous code and with tensorflow/keras functions

model = Sequential()
model.add(Dense(30, activation='sigmoid'))
model.add(Dense(1, activation='linear'))

model.compile(optimizer=tf.optimizers.Adam(learning_rate=0.001), loss='mean_squared_error')
model.fit(Xtrain,ytrain,epochs=500,verbose=0)

tf_error = model.evaluate(Xtest,ytest)

dnn = MLPRegressor(hidden_layer_sizes=(30,),learning_rate_init=0.001, max_iter=500)
dnn.fit(Xtrain,ytrain)
test_predict = dnn.predict(Xtest)

sklearn_error = mse(test_predict,ytest)

nn = NeuralNetwork(3,regr_cost_grad,random_state=1)
layer1 = Layer(30,sigmoid,sigmoid_grad)
layer2 = Layer(1,linear,linear_grad)
nn.add_layer(layer1)
nn.add_layer(layer2)
nn.train(Xtrain,ytrain,initial_learning_rate=0.001,regularization=0, epochs=500)
nn_error = mse(nn.feed_forward_out(Xtest),ytest)

print("Tensorflow error: " + str(tf_error))
print("Scikit-learn error: " + str(sklearn_error))
print("Our model's error: " + str(nn_error))

In [None]:
# weights con variance diverse
# bias con normal distribution o fissi

# evoluzione test e training durante le epochs

n_epochs = 100
eta=0.0001
step = 5

nn = NeuralNetwork(3,regr_cost_grad,random_state=1)
layer1 = Layer(30,sigmoid,sigmoid_grad)
layer2 = Layer(1,linear,linear_grad)

nn.add_layer(layer1)
nn.add_layer(layer2)

var1_err=[]
varn_err = []

for i in range(0,n_epochs,step):
    varn_err.append(mse(nn.feed_forward_out(Xtest),ytest))
    nn.train(Xtrain,ytrain,initial_learning_rate=eta,epochs=step,minibatch_size = 20)

In [None]:
plt.figure(figsize=(10,8))
plt.plot(range(0,n_epochs,step),var1_err,label='Variance 1')
plt.plot(range(0,n_epochs,step),varn_err,label='Variance 1/sqrt(n)')
plt.legend()
plt.title("Test error vs Epochs")
plt.xlabel("Epochs")
plt.ylabel("Error")
plt.ylim(-0.1,1)
#plt.savefig("weight_variance.png")

In [None]:
# different bias initialization

# evoluzione test e training durante le epochs

n_epochs = 100
eta=0.001
step = 5

nn = NeuralNetwork(3,regr_cost_grad,random_state=1)
layer1 = Layer(30,sigmoid,sigmoid_grad,initial_bias=1)
layer2 = Layer(1,linear,linear_grad, initial_bias=1)

nn.add_layer(layer1)
nn.add_layer(layer2)

b5_err = []

for i in range(0,n_epochs,step):
    b5_err.append(mse(nn.feed_forward_out(Xtest),ytest))
    nn.train(Xtrain,ytrain,initial_learning_rate=eta_const,epochs=step,minibatch_size = 20)

In [None]:
plt.figure(figsize=(10,8))
plt.plot(range(0,n_epochs,step),b1_err,label='Initial bias 0.01')
plt.plot(range(0,n_epochs,step),b2_err,label='Initial bias 0.1')
plt.plot(range(0,n_epochs,step),b4_err,label='Initial bias 1')
plt.plot(range(0,n_epochs,step),b5_err,label='Standard normal bias')
plt.legend()
plt.title("Test error vs Epochs")
plt.xlabel("Epochs")
plt.ylabel("Error")
plt.ylim(-0.1,1)
#plt.savefig("bias_init.png")

In [None]:
# different activation functions
# evoluzione test e training durante le epochs

n_epochs = 400
step = 5

nn_sigm = NeuralNetwork(3,regr_cost_grad,random_state=1)
nn_relu = NeuralNetwork(3,regr_cost_grad,random_state=1)
nn_lrelu = NeuralNetwork(3,regr_cost_grad,random_state=1)
nn_tanh = NeuralNetwork(3,regr_cost_grad,random_state=1)
layer1sigm = Layer(30,sigmoid,sigmoid_grad)
layer2sigm= Layer(1,linear,linear_grad)
layer1relu = Layer(30,ReLU,ReLU_grad)
layer2relu = Layer(1,linear,linear_grad)
layer1lrelu = Layer(30,leakyReLU,leakyReLU_grad)
layer2lrelu = Layer(1,linear,linear_grad)
layer1tanh = Layer(30,tanh,tanh_grad)
layer2tanh = Layer(1,linear,linear_grad)
nn_sigm.add_layer(layer1sigm)
nn_sigm.add_layer(layer2sigm)
nn_relu.add_layer(layer1relu)
nn_relu.add_layer(layer2relu)
nn_lrelu.add_layer(layer1lrelu)
nn_lrelu.add_layer(layer2lrelu)
nn_tanh.add_layer(layer1tanh)
nn_tanh.add_layer(layer2tanh)

err_sigm = []
err_relu = []
err_lrelu = []
err_tanh = []

for i in range(0,n_epochs,step):
    err_sigm.append(mse(nn_sigm.feed_forward_out(Xtest),ytest))
    err_relu.append(mse(nn_relu.feed_forward_out(Xtest),ytest))
    err_lrelu.append(mse(nn_lrelu.feed_forward_out(Xtest),ytest))
    err_tanh.append(mse(nn_tanh.feed_forward_out(Xtest),ytest))
    nn_sigm.train(Xtrain,ytrain,initial_learning_rate=0.00001,epochs=step)
    nn_relu.train(Xtrain,ytrain,initial_learning_rate=0.00001,epochs=step)
    nn_lrelu.train(Xtrain,ytrain,initial_learning_rate=0.00001,epochs=step)
    nn_tanh.train(Xtrain,ytrain,initial_learning_rate=0.00001,epochs=step)

In [None]:
plt.figure(figsize=(10,8))
plt.plot(range(0,n_epochs,step),err_sigm,label='Sigmoid')
plt.plot(range(0,n_epochs,step),err_relu,label='ReLU')
plt.plot(range(0,n_epochs,step),err_lrelu,label='Leaky ReLU')
plt.plot(range(0,n_epochs,step),err_tanh,label='Tanh')
plt.legend()
plt.title("Test error vs Epochs")
plt.xlabel("Epochs")
plt.ylabel("Error")
plt.ylim(-0.05,1)
plt.savefig("nn_activation_function.png")