In [None]:
import numpy as np
import seaborn
from matplotlib import pyplot as plt
from mpl_toolkits import mplot3d
import imageio
import glob
from IPython import display

seaborn.set()
np.set_printoptions(suppress=True)

def read_datafile(file):
    with open(file, 'r') as datafile:
        contents = datafile.read()
        data = np.array([line.split('\t') for line in contents.split('\n')], dtype=np.float64)
    return data

In [None]:
np.random.seed(550)

In [None]:
INPUT_DIM = 1
OUTPUT_DIM = 1

def normalize_data(data, training_mean, training_std):
    data = data - training_mean
    data = data / training_std
    return data

train1_data = read_datafile('data/train1')
train2_data = read_datafile('data/train2')
print(f"train1:{train1_data.shape}, train2:{train2_data.shape}")

train1_x, train1_y = train1_data.T
train2_x, train2_y = train2_data.T

mean_train1_x = np.mean(train1_x)
std_train1_x = np.std(train1_x)

mean_train1_y = np.mean(train1_y)
std_train1_y = np.std(train1_y)

mean_train2_x = np.mean(train2_x)
std_train2_x = np.std(train2_x)

mean_train2_y = np.mean(train2_y)
std_train2_y = np.std(train2_y)

train1_x = normalize_data(train1_x, mean_train1_x, std_train1_x)
train1_y = normalize_data(train1_y, mean_train1_y, std_train1_y)
train2_x = normalize_data(train2_x, mean_train2_x, std_train2_x)
train2_y = normalize_data(train2_y, mean_train2_y, std_train2_y)

fig = plt.figure()
fig.set_facecolor('w')
plt.scatter(train1_x, train1_y)
plt.title('train1 data')
plt.xlabel('x')
plt.ylabel('y')
plt.show()

fig = plt.figure()
fig.set_facecolor('w')
plt.scatter(train2_x, train2_y)
plt.title('train2 data')
plt.xlabel('x')
plt.ylabel('y')
plt.show()

train1_nb_examples = train1_x.shape[0]
train2_nb_examples = train2_x.shape[0]

test1_data = read_datafile('data/test1')
test2_data = read_datafile('data/test2')
print(f"test1:{test1_data.shape}, test2:{test2_data.shape}")

test1_x, test1_y = test1_data.T
test2_x, test2_y = test2_data.T

test1_x = normalize_data(test1_x, mean_train1_x, std_train1_x)
test1_y = normalize_data(test1_y, mean_train1_y, std_train1_y)
test2_x = normalize_data(test2_x, mean_train2_x, std_train2_x)
test2_y = normalize_data(test2_y, mean_train2_y, std_train2_y)

fig = plt.figure()
fig.set_facecolor('w')
plt.scatter(test1_x, test1_y)
plt.title('test1 data')
plt.xlabel('x')
plt.ylabel('y')
plt.show()

fig = plt.figure()
fig.set_facecolor('w')
plt.scatter(test2_x, test2_y)
plt.title('test2 data')
plt.xlabel('x')
plt.ylabel('y')
plt.show()

test1_nb_examples = test1_x.shape[0]
test2_nb_examples = test2_x.shape[0]

In [None]:
min_train1_x = np.min(train1_x)
max_train1_x = np.max(train1_x)
train1_uniform_x_samples = np.linspace(min_train1_x, max_train1_x, train1_nb_examples)

min_train2_x = np.min(train2_x)
max_train2_x = np.max(train2_x)
train2_uniform_x_samples = np.linspace(min_train2_x, max_train2_x, train2_nb_examples)

In [None]:
class LinearRegressorANN:
    def __init__(self, input_dim=1, output_dim=1):
        self.w1 = np.array(np.random.normal(size=(output_dim, input_dim)))
        self.b1 = np.full((output_dim, 1), 0.01)
    
    def set_weights(self, new_weights):
        self.w1 = new_weights
    
    def forward(self, inputs):
        self.x = inputs
        self.nb_of_input = self.x.shape[0]
        
        self.wpb = np.matmul(self.w1, self.x) + self.b1
        return self.wpb
    
    def loss(self, yt):
        self.err = yt.reshape((self.nb_of_input, 1)) - self.wpb.reshape((self.nb_of_input, 1))
        self.errsqr = self.err**2
        return self.errsqr
    
    def backward(self, learning_rate):
        derr = (2 * self.err)
        dyt = 1 * derr
        self.dwpb = -1 * derr
        dwmx = (1 * self.dwpb).reshape((self.nb_of_input, 1, 1))
        db1 = 1 * self.dwpb
        dw1 = np.matmul(dwmx, np.transpose(self.x, axes=(0, 2, 1)))
        dx = np.matmul(np.transpose(self.w1), dwmx) 
        
        self.w1 = self.w1 - learning_rate * np.mean(dw1, axis=0)
        self.b1 = self.b1 - learning_rate * np.mean(db1, axis=0)

In [None]:
class TwoLayerANN:
    def __init__(self, units, input_dim=1, output_dim=1, 
                 activation_function="relu", 
                 loss_function="mse", 
                 use_momentum=False, momentum_factor=0.9):
        self.w1 = np.random.normal(size=(units, input_dim)) * np.sqrt(2.0/input_dim)
        self.b1 = np.full((units, 1), 0.01)
                                                                                     
        self.w2 = np.random.normal(size=(output_dim, units)) * np.sqrt(2.0/units)
        self.b2 = np.full((output_dim, 1), 0.01)
        
        self.activation_function = activation_function
        self.loss_function = loss_function
        self.use_momentum = use_momentum
        self.momentum_factor = momentum_factor
        self.v1, self.v2, self.v3, self.v4 = 0, 0, 0, 0
        
    def set_weights_1(self, new_weights):
        self.w1 = new_weights

    def set_weights_2(self, new_weights):
        self.w2 = new_weights
    
    def forward(self, inputs):        
        self.x = inputs
        self.nb_of_input = self.x.shape[0]
        
        self.wmx = np.matmul(self.w1, self.x)
        self.wpb = self.wmx + self.b1
        self.act = self.hidden_activation()
        self.wmr = np.matmul(self.w2, self.act);
        self.wpb2 = self.wmr + self.b2
        return self.wpb2
    
    def hidden_activation(self):
        if self.activation_function == "relu":
            self.sigmoid = None
            self.lrelu = None
            self.relu = np.maximum(self.wpb, np.zeros_like(self.wpb))
            return self.relu
        elif self.activation_function == "sigmoid":
            self.relu = None
            self.lrelu = None
            self.sigmoid = 1.0 / (1 + np.exp(-self.wpb))
            return self.sigmoid
        elif self.activation_function == "lrelu":
            self.relu = None
            self.sigmoid = None
            self.lrelu_cons = 0.01
            self.lrelu = np.where(self.wpb > 0, self.wpb, self.lrelu_cons * self.wpb)
            return self.lrelu
    
    def loss(self, yt):
        if self.loss_function == "mse":
            self.abserr = None
            self.err = yt.reshape((self.nb_of_input, 1)) - self.wpb2.reshape((self.nb_of_input, 1))
            self.errsqr = self.err**2
            return self.errsqr
        elif self.loss_function == "mae":
            self.errsqr = None
            self.err = yt.reshape((self.nb_of_input, 1)) - self.wpb2.reshape((self.nb_of_input, 1))
            self.abserr = np.abs(self.err)
            return self.abserr
    
    def backward_loss(self):
        if self.loss_function == "mse":
            derr = (2 * self.err)
            dyt = 1 * derr
            self.dwpb2 = -1 * derr
            return self.dwpb2
        elif self.loss_function == "mae":
            derr = np.where(self.err > 0, 1, -1)
            dyt = 1 * derr
            self.dwpb2 = -1 * derr
            return self.dwpb2
    
    def backward_hidden_activation(self):
        if self.activation_function == "relu":
            self.dwpb = np.where(self.wpb > 0, 1 * self.dact, 0)
            return self.dwpb
        elif self.activation_function == "sigmoid":
            self.dwpb = ((1 - self.sigmoid) * self.sigmoid) * self.dact
            return self.dwpb
        elif self.activation_function == "lrelu":
            self.dwpb = np.where(self.wpb > 0, 1 * self.dact, self.lrelu_cons * self.dact)
            return self.dwpb
    
    def backward(self, learning_rate):
        self.dwpb2 = self.backward_loss()
        
        dwmr = (1 * self.dwpb2).reshape((self.nb_of_input, 1, 1))
        db2 = 1 * self.dwpb2
        dw2 = np.matmul(dwmr, np.transpose(self.act, axes=(0, 2, 1)))
        self.dact = np.matmul(np.transpose(self.w2), dwmr)
        
        self.dwpb = self.backward_hidden_activation()
        
        dwmx = 1 * self.dwpb
        db1 = 1 * self.dwpb
        
        dw1 = np.matmul(dwmx, np.transpose(self.x, axes=(0, 2, 1)))
        dx = np.matmul(np.transpose(self.w1), dwmx)

        if self.use_momentum:
            self.v1 = self.momentum_factor * self.v1 - learning_rate * np.mean(dw1, axis=0)
            self.w1 = self.w1 + self.v1
            self.v2 = self.momentum_factor * self.v2 - learning_rate * np.mean(db1, axis=0)
            self.b1 = self.b1 + self.v2
            self.v3 = self.momentum_factor * self.v3 - learning_rate * np.mean(dw2, axis=0)
            self.w2 = self.w2 + self.v3
            self.v4 = self.momentum_factor * self.v4 - learning_rate * np.mean(db2, axis=0)
            self.b2 = self.b2 + self.v4
        else:
            self.w1 = self.w1 - learning_rate * np.mean(dw1, axis=0)
            self.b1 = self.b1 - learning_rate * np.mean(db1, axis=0)
            self.w2 = self.w2 - learning_rate * np.mean(dw2, axis=0)
            self.b2 = self.b2 - learning_rate * np.mean(db2, axis=0)

In [None]:
linear_regressor_ann = LinearRegressorANN()
lra_output = linear_regressor_ann.forward(train1_uniform_x_samples.reshape((train1_nb_examples, INPUT_DIM, OUTPUT_DIM)))
fig = plt.figure()
fig.set_facecolor('w')
plt.scatter(train1_x, train1_y)
plt.plot(train1_uniform_x_samples, lra_output.reshape((train1_nb_examples, 1)), linewidth=3)
plt.title('Linear regressor ANN')
plt.xlabel('x')
plt.ylabel('y')
plt.show()

nb_of_hiddenunits = 2
two_layer_ann = TwoLayerANN(nb_of_hiddenunits)
tla_output = two_layer_ann.forward(train1_uniform_x_samples.reshape(train1_nb_examples, INPUT_DIM, OUTPUT_DIM))
fig = plt.figure()
fig.set_facecolor('w')
plt.scatter(train1_x, train1_y)
plt.plot(train1_uniform_x_samples, tla_output.reshape((train1_nb_examples, 1)), linewidth=3)
plt.title('Two layer ANN')
plt.xlabel('x')
plt.ylabel('y')
plt.show()

In [None]:
print(f"Linear regressor ANN loss:{np.mean(linear_regressor_ann.loss(train1_y)):.2f}")
print(f"Two layer ANN loss:{np.mean(two_layer_ann.loss(train1_y)):.2f}")
mean_regressor_loss = (train1_y.reshape((train1_nb_examples, 1)) - np.full((train1_nb_examples, 1), np.mean(train1_y)))**2
print(f"Mean regressor loss:{np.mean(mean_regressor_loss):.2f}")

In [None]:
linear_regressor_ann = LinearRegressorANN() # reset network
min_lra_loss = np.inf
random_weights = np.arange(-10,11)
fig = plt.figure()
fig.set_facecolor('w')
for i in random_weights:
    linear_regressor_ann.set_weights(i.reshape((OUTPUT_DIM, INPUT_DIM)))
    lra_output = linear_regressor_ann.forward(train1_x.reshape((train1_nb_examples, INPUT_DIM, OUTPUT_DIM)))
    lra_loss = np.mean(linear_regressor_ann.loss(train1_y))
    if lra_loss < min_lra_loss:
        min_lra_loss = lra_loss
    plt.scatter(i, lra_loss, color="blue")
plt.title('Loss for Linear regressor ANN')
plt.xlabel('weights')
plt.ylabel('loss')
print(f"Minimum loss:{min_lra_loss:.2f}")
plt.show()

In [None]:
nb_of_hiddenunits = 2
two_layer_ann = TwoLayerANN(nb_of_hiddenunits) # reset network
random_weights = np.arange(-10,11)
fig = plt.figure(figsize=(12, 6))
ax = fig.add_subplot(111, projection='3d')
for i in random_weights:
    for j in random_weights:
        two_layer_ann.set_weights_1(np.array([i, j]).reshape((nb_of_hiddenunits, INPUT_DIM)))
        tla_output = two_layer_ann.forward(train1_x.reshape((train1_nb_examples, INPUT_DIM, OUTPUT_DIM)))
        tla_loss = np.mean(two_layer_ann.loss(train1_y))
        ax.scatter(i, j, tla_loss, color="blue")
plt.title('Loss for Two Layer ANN')
ax.set_xlabel('weights_1')
ax.set_ylabel('weights_1')
ax.set_zlabel('loss')
plt.show()

two_layer_ann = TwoLayerANN(nb_of_hiddenunits) # reset network
random_weights = np.arange(-10,11)
fig = plt.figure(figsize=(12, 6))
ax = fig.add_subplot(111, projection='3d')
for i in random_weights:
    for j in random_weights:
        two_layer_ann.set_weights_2(np.array([i, j]).reshape((OUTPUT_DIM, nb_of_hiddenunits)))
        tla_output = two_layer_ann.forward(train1_x.reshape((train1_nb_examples, INPUT_DIM, OUTPUT_DIM)))
        tla_loss = np.mean(two_layer_ann.loss(train1_y))
        ax.scatter(i, j, tla_loss, color="blue")
plt.title('Loss for Two Layer ANN')
ax.set_xlabel('weights_2')
ax.set_ylabel('weights_2')
ax.set_zlabel('loss')
plt.show()

In [None]:
linear_regressor_ann = LinearRegressorANN()
two_layer_ann = TwoLayerANN(nb_of_hiddenunits)

In [None]:
np.random.seed(550)
linear_regressor_ann = LinearRegressorANN() # reset network
learning_rate = 6e-2
nb_of_epochs = 20
batch_size = 30

fig = plt.figure()
min_lra_loss = np.inf
for epoch in range(nb_of_epochs):
    for i in range(train1_nb_examples//batch_size):
        linear_regressor_ann.forward(train1_x[i*batch_size:i*batch_size+batch_size].reshape((batch_size, INPUT_DIM, OUTPUT_DIM)))
        linear_regressor_ann.loss(train1_y[i*batch_size:i*batch_size+batch_size])
        linear_regressor_ann.backward(learning_rate)
    lra_output = linear_regressor_ann.forward(train1_x.reshape((train1_nb_examples, INPUT_DIM, OUTPUT_DIM)))
    lra_loss = np.mean(linear_regressor_ann.loss(train1_y))
    print(f"Epoch:{epoch+1}, Linear regressor ANN loss:{lra_loss:.4f}")
    plt.scatter(train1_x, train1_y)
    lra_output = linear_regressor_ann.forward(train1_uniform_x_samples.reshape((train1_nb_examples, INPUT_DIM, OUTPUT_DIM)))
    plt.plot(train1_uniform_x_samples, lra_output.reshape((train1_nb_examples, 1)), linewidth=3)
    plt.title(f'Linear regressor ANN, Epoch:{epoch+1}, Training Set, Loss:{lra_loss:.4f}')
    plt.xlabel('x')
    plt.ylabel('y')
    plt.savefig(f'gif/lra/lra_{epoch+1:03d}.png')
    plt.close()
    if min_lra_loss - lra_loss > 1e-5:
        min_lra_loss = lra_loss
    else:
        print("Stopped training")
        plt.scatter(train1_x, train1_y)
        lra_output = linear_regressor_ann.forward(train1_uniform_x_samples.reshape((train1_nb_examples, INPUT_DIM, OUTPUT_DIM)))
        plt.plot(train1_uniform_x_samples, lra_output.reshape((train1_nb_examples, 1)), linewidth=3)
        plt.title(f'Linear regressor ANN, Epoch:{epoch+1}, Training Set, Loss:{lra_loss:.4f}')
        plt.xlabel('x')
        plt.ylabel('y')
        plt.savefig('output/lra_train.png')
        plt.close()
        break

anim_file = 'gif/lra_training.gif'

frames = []
filenames = glob.glob('gif/lra/lra_*.png')
filenames = sorted(filenames)
for i, filename in enumerate(filenames):
    frames.append(imageio.imread(filename))
for i in range(10):
    frames.append(imageio.imread(filename))
    
imageio.mimsave(anim_file, frames, 'GIF', fps=8)

In [None]:
display.Image(filename='gif/lra_training.gif')

In [None]:
lra_output = linear_regressor_ann.forward(train1_x.reshape((train1_nb_examples, INPUT_DIM, OUTPUT_DIM)))
lra_loss = np.mean(linear_regressor_ann.loss(train1_y))
lra_loss_std = np.std(linear_regressor_ann.loss(train1_y))
print(f"Linear regressor ANN, training set loss:{lra_loss:.4f}, std:{lra_loss_std:.4f}")

fig = plt.figure()
fig.set_facecolor('w')
plt.scatter(train1_x, train1_y)
lra_output = linear_regressor_ann.forward(train1_uniform_x_samples.reshape((train1_nb_examples, INPUT_DIM, OUTPUT_DIM)))
plt.plot(train1_uniform_x_samples, lra_output.reshape((train1_nb_examples, 1)), linewidth=3)
plt.title(f'Linear regressor ANN, Training Set, Loss:{np.mean(lra_loss):.4f}')
plt.xlabel('x')
plt.ylabel('y')
plt.savefig('output/lra_train_curve.png')
plt.show()

lra_output = linear_regressor_ann.forward(test1_x.reshape((test1_nb_examples, INPUT_DIM, OUTPUT_DIM)))
lra_loss = np.mean(linear_regressor_ann.loss(test1_y))
lra_loss_std = np.std(linear_regressor_ann.loss(test1_y))
print(f"Linear regressor ANN, test set loss:{lra_loss:.4f}, std:{lra_loss_std:.4f}")

fig = plt.figure()
fig.set_facecolor('w')
plt.scatter(test1_x, test1_y)
lra_output = linear_regressor_ann.forward(train1_uniform_x_samples.reshape((train1_nb_examples, INPUT_DIM, OUTPUT_DIM)))
plt.plot(train1_uniform_x_samples, lra_output.reshape((train1_nb_examples, 1)), linewidth=3)
plt.title(f'Linear regressor ANN, Test Set, Loss:{np.mean(lra_loss):.4f}')
plt.xlabel('x')
plt.ylabel('y')
plt.savefig('output/lra_test_curve.png')
plt.show()

In [None]:
lr_config = {2: 8e-3, 4: 1e-2, 8: 1e-2, 16: 5e-3}
epoch_config = {2: 5500, 4: 8000, 8: 7500, 16: 9000}
batchsize_config = {2: 2, 4: 2, 8: 2, 16: 3}
activation_config = {2: "sigmoid", 4: "sigmoid", 8: "sigmoid", 16: "sigmoid"}
loss_config = {2: "mse", 4: "mse", 8: "mse", 16: "mse"}
momentum_config = {2: 0.75, 4: 0.75, 8: 0.9, 16: 0.6} # use 0 for no momentum
stop_loss_config = {2: 0.05795, 4: 0.02025, 8: 0.02045, 16: 0.02065}
plot_color = {2: "red", 4: "cyan", 8: "magenta", 16: "black"}

trained_nets = []

for nb_of_hiddenunits in (2, 4, 8, 16):
    np.random.seed(550)
    learning_rate = lr_config[nb_of_hiddenunits]
    nb_of_epochs = epoch_config[nb_of_hiddenunits]
    batch_size = batchsize_config[nb_of_hiddenunits]

    two_layer_ann = TwoLayerANN(nb_of_hiddenunits, 
                                activation_function=activation_config[nb_of_hiddenunits], 
                                loss_function=loss_config[nb_of_hiddenunits], 
                                use_momentum=True, momentum_factor=momentum_config[nb_of_hiddenunits]) # reset network


    fig = plt.figure()
    print(f"Training two layer ANN with {nb_of_hiddenunits} units")
    for epoch in range(nb_of_epochs):
        for i in range(train1_nb_examples//batch_size):
            two_layer_ann.forward(train1_x[i*batch_size:i*batch_size+batch_size].reshape((batch_size, INPUT_DIM, OUTPUT_DIM)))
            two_layer_ann.loss(train1_y[i*batch_size:i*batch_size+batch_size])
            two_layer_ann.backward(learning_rate)
        tla_output = two_layer_ann.forward(train1_x.reshape((train1_nb_examples, INPUT_DIM, OUTPUT_DIM)))
        tla_loss = np.mean(two_layer_ann.loss(train1_y))
        if epoch == 0 or (epoch+1) % 500 == 0:
            print(f"Epoch:{epoch+1}, Two layer ANN loss:{tla_loss:.4f}")
            plt.scatter(train1_x, train1_y)
            tla_output = two_layer_ann.forward(train1_uniform_x_samples.reshape((train1_nb_examples, INPUT_DIM, OUTPUT_DIM)))
            plt.plot(train1_uniform_x_samples, tla_output.reshape((train1_nb_examples, 1)), 
                     color=plot_color[nb_of_hiddenunits], linewidth=3)
            plt.title(f'Two layer ANN ({nb_of_hiddenunits} units), Epoch:{epoch+1}, Training Set, Loss:{tla_loss:.4f}')
            plt.xlabel('x')
            plt.ylabel('y')
            plt.savefig(f'gif/tla_{nb_of_hiddenunits}/tla_{epoch+1:04d}.png')
            plt.close()
        if tla_loss < stop_loss_config[nb_of_hiddenunits]:
            print(f"Stopped training, Epoch:{epoch+1}, Two layer ANN loss:{tla_loss:.4f}")
            plt.scatter(train1_x, train1_y)
            tla_output = two_layer_ann.forward(train1_uniform_x_samples.reshape((train1_nb_examples, INPUT_DIM, OUTPUT_DIM)))
            plt.plot(train1_uniform_x_samples, tla_output.reshape((train1_nb_examples, 1)), 
                     color=plot_color[nb_of_hiddenunits], linewidth=3)
            plt.title(f'Two layer ANN ({nb_of_hiddenunits} units), Epoch:{epoch+1}, Training Set, Loss:{tla_loss:.4f}')
            plt.xlabel('x')
            plt.ylabel('y')
            plt.savefig(f'output/tla_{nb_of_hiddenunits}_train.png')
            plt.close()
            break
            
    anim_file = f'gif/tla_{nb_of_hiddenunits}_training.gif'
    
    frames = []
    filenames = glob.glob(f'gif/tla_{nb_of_hiddenunits}/tla_*.png')
    filenames = sorted(filenames)
    for i, filename in enumerate(filenames):
        frames.append(imageio.imread(filename))
    for i in range(10):
        frames.append(imageio.imread(filename))
    
    imageio.mimsave(anim_file, frames, 'GIF', fps=4)
    
    trained_nets.append(two_layer_ann)

In [None]:
nb_of_hiddenunits = 2
anim_file = f'gif/tla_{nb_of_hiddenunits}_training.gif'
display.Image(filename=anim_file)

In [None]:
nb_of_hiddenunits = 4
anim_file = f'gif/tla_{nb_of_hiddenunits}_training.gif'
display.Image(filename=anim_file)

In [None]:
nb_of_hiddenunits = 8
anim_file = f'gif/tla_{nb_of_hiddenunits}_training.gif'
display.Image(filename=anim_file)

In [None]:
nb_of_hiddenunits = 16
anim_file = f'gif/tla_{nb_of_hiddenunits}_training.gif'
display.Image(filename=anim_file)

In [None]:
ann_hidden_units = [2, 4, 8, 16]
for i in range(4):
    two_layer_ann = trained_nets[i]
    tla_output = two_layer_ann.forward(train1_x.reshape((train1_nb_examples, INPUT_DIM, OUTPUT_DIM)))
    tla_loss = np.mean(two_layer_ann.loss(train1_y))
    tla_loss_std = np.std(two_layer_ann.loss(train1_y))
    print(f"Two layer ANN, {ann_hidden_units[i]} units, training set loss:{tla_loss:.4f}, std:{tla_loss_std:.4f}")

    fig = plt.figure()
    fig.set_facecolor('w')
    plt.scatter(train1_x, train1_y)
    tla_output = two_layer_ann.forward(train1_uniform_x_samples.reshape((train1_nb_examples, INPUT_DIM, OUTPUT_DIM)))
    plt.plot(train1_uniform_x_samples, tla_output.reshape((train1_nb_examples, 1)), 
             color=plot_color[ann_hidden_units[i]], linewidth=3)
    plt.title(f'Two layer ANN, {ann_hidden_units[i]} units, Training Set, Loss:{np.mean(tla_loss):.4f}')
    plt.xlabel('x')
    plt.ylabel('y')
    plt.savefig(f'output/tla_{ann_hidden_units[i]}_train_curve.png')
    plt.show()

    tla_output = two_layer_ann.forward(test1_x.reshape((test1_nb_examples, INPUT_DIM, OUTPUT_DIM)))
    tla_loss = np.mean(two_layer_ann.loss(test1_y))
    tla_loss_std = np.std(two_layer_ann.loss(test1_y))
    print(f"Two layer ANN, {ann_hidden_units[i]} units, test set loss:{tla_loss:.4f}, std:{tla_loss_std:.4f}")
    
    fig = plt.figure()
    fig.set_facecolor('w')
    plt.scatter(test1_x, test1_y)
    tla_output = two_layer_ann.forward(train1_uniform_x_samples.reshape((train1_nb_examples, INPUT_DIM, OUTPUT_DIM)))
    plt.plot(train1_uniform_x_samples, tla_output.reshape((train1_nb_examples, 1)), 
             color=plot_color[ann_hidden_units[i]], linewidth=3)
    plt.title(f'Two layer ANN, {ann_hidden_units[i]} units, Test Set, Loss:{np.mean(tla_loss):.4f}')
    plt.xlabel('x')
    plt.ylabel('y')
    plt.savefig(f'output/tla_{ann_hidden_units[i]}_test_curve.png')
    plt.show()

In [None]:
fig = plt.figure()
fig.set_facecolor('w')
plt.scatter(train1_x, train1_y)

two_layer_ann = trained_nets[0]
tla_output = two_layer_ann.forward(train1_uniform_x_samples.reshape((train1_nb_examples, INPUT_DIM, OUTPUT_DIM)))
plt.plot(train1_uniform_x_samples, tla_output.reshape((train1_nb_examples, 1)), label='2 Units', 
             color=plot_color[2], linewidth=3)

two_layer_ann = trained_nets[1]
tla_output = two_layer_ann.forward(train1_uniform_x_samples.reshape((train1_nb_examples, INPUT_DIM, OUTPUT_DIM)))
plt.plot(train1_uniform_x_samples, tla_output.reshape((train1_nb_examples, 1)), label='4 Units', 
             color=plot_color[4], linewidth=6)

two_layer_ann = trained_nets[2]
tla_output = two_layer_ann.forward(train1_uniform_x_samples.reshape((train1_nb_examples, INPUT_DIM, OUTPUT_DIM)))
plt.plot(train1_uniform_x_samples, tla_output.reshape((train1_nb_examples, 1)), label='8 Units', 
             color=plot_color[8], linewidth=3)

two_layer_ann = trained_nets[3]
tla_output = two_layer_ann.forward(train1_uniform_x_samples.reshape((train1_nb_examples, INPUT_DIM, OUTPUT_DIM)))
plt.plot(train1_uniform_x_samples, tla_output.reshape((train1_nb_examples, 1)), label='16 Units', 
             color=plot_color[16], linewidth=3)

leg = plt.legend(loc='upper left')

for legobj in leg.legendHandles:
    legobj.set_linewidth(3)

plt.title(f'Two layer ANNs with different number of hidden units')
plt.xlabel('x')
plt.ylabel('y')
plt.savefig('output/tla_all_curves.png')
plt.show()

In [None]:
np.random.seed(550)
linear_regressor_ann = LinearRegressorANN() # reset network
learning_rate = 1.5e-1
nb_of_epochs = 20
batch_size = 229

fig = plt.figure()
min_lra_loss = np.inf
for epoch in range(nb_of_epochs):
    for i in range(train2_nb_examples//batch_size):
        linear_regressor_ann.forward(train2_x[i*batch_size:i*batch_size+batch_size].reshape((batch_size, INPUT_DIM, OUTPUT_DIM)))
        linear_regressor_ann.loss(train2_y[i*batch_size:i*batch_size+batch_size])
        linear_regressor_ann.backward(learning_rate)
    lra_output = linear_regressor_ann.forward(train2_x.reshape((train2_nb_examples, INPUT_DIM, OUTPUT_DIM)))
    lra_loss = np.mean(linear_regressor_ann.loss(train2_y))
    print(f"Epoch:{epoch+1}, Linear regressor ANN loss:{lra_loss:.4f}")
    plt.scatter(train2_x, train2_y)
    lra_output = linear_regressor_ann.forward(train2_uniform_x_samples.reshape((train2_nb_examples, INPUT_DIM, OUTPUT_DIM)))
    plt.plot(train2_uniform_x_samples, lra_output.reshape((train2_nb_examples, 1)), linewidth=3)
    plt.title(f'Linear regressor ANN, Epoch:{epoch+1}, Training Set, Loss:{lra_loss:.4f}')
    plt.xlabel('x')
    plt.ylabel('y')
    plt.savefig(f'gif/lra_2/lra_{epoch+1:03d}.png')
    plt.close()
    if min_lra_loss - lra_loss > 1e-5:
        min_lra_loss = lra_loss
    else:
        print("Stopped training")
        plt.scatter(train2_x, train2_y)
        lra_output = linear_regressor_ann.forward(train2_uniform_x_samples.reshape((train2_nb_examples, INPUT_DIM, OUTPUT_DIM)))
        plt.plot(train2_uniform_x_samples, lra_output.reshape((train2_nb_examples, 1)), linewidth=3)
        plt.title(f'Linear regressor ANN, Epoch:{epoch+1}, Training Set, Loss:{lra_loss:.4f}')
        plt.xlabel('x')
        plt.ylabel('y')
        plt.savefig('output/lra_train_2.png')
        plt.close()
        break

anim_file = 'gif/lra_training_2.gif'

frames = []
filenames = glob.glob('gif/lra_2/lra_*.png')
filenames = sorted(filenames)
for i, filename in enumerate(filenames):
    frames.append(imageio.imread(filename))
for i in range(10):
    frames.append(imageio.imread(filename))
    
imageio.mimsave(anim_file, frames, 'GIF', fps=8)

In [None]:
anim_file = 'gif/lra_training_2.gif'
display.Image(filename=anim_file)

In [None]:
lra_output = linear_regressor_ann.forward(train2_x.reshape((train2_nb_examples, INPUT_DIM, OUTPUT_DIM)))
lra_loss = np.mean(linear_regressor_ann.loss(train2_y))
lra_loss_std = np.std(linear_regressor_ann.loss(train2_y))
print(f"Linear regressor ANN, training set loss:{lra_loss:.4f}, std:{lra_loss_std:.4f}")

fig = plt.figure()
fig.set_facecolor('w')
plt.scatter(train2_x, train2_y)
lra_output = linear_regressor_ann.forward(train2_uniform_x_samples.reshape((train2_nb_examples, INPUT_DIM, OUTPUT_DIM)))
plt.plot(train2_uniform_x_samples, lra_output.reshape((train2_nb_examples, 1)), linewidth=3)
plt.title(f'Linear regressor ANN, Training Set, Loss:{np.mean(lra_loss):.4f}')
plt.xlabel('x')
plt.ylabel('y')
plt.savefig('output/lra_train_curve_2.png')
plt.show()

lra_output = linear_regressor_ann.forward(test2_x.reshape((test2_nb_examples, INPUT_DIM, OUTPUT_DIM)))
lra_loss = np.mean(linear_regressor_ann.loss(test2_y))
lra_loss_std = np.std(linear_regressor_ann.loss(test2_y))
print(f"Linear regressor ANN, test set loss:{lra_loss:.4f}, std:{lra_loss_std:.4f}")

fig = plt.figure()
fig.set_facecolor('w')
plt.scatter(test2_x, test2_y)
lra_output = linear_regressor_ann.forward(train2_uniform_x_samples.reshape((train2_nb_examples, INPUT_DIM, OUTPUT_DIM)))
plt.plot(train2_uniform_x_samples, lra_output.reshape((train2_nb_examples, 1)), linewidth=3)
plt.title(f'Linear regressor ANN, Test Set, Loss:{np.mean(lra_loss):.4f}')
plt.xlabel('x')
plt.ylabel('y')
plt.savefig('output/lra_test_curve_2.png')
plt.show()

In [None]:
lr_config = {2: 9e-3, 4: 9e-3, 8: 1e-2, 16: 9e-3}
epoch_config = {2: 7500, 4: 6500, 8: 8000, 16: 30000}
batchsize_config = {2: 3, 4: 3, 8: 2, 16: 2}
activation_config = {2: "sigmoid", 4: "sigmoid", 8: "sigmoid", 16: "sigmoid"}
loss_config = {2: "mse", 4: "mse", 8: "mse", 16: "mse"}
momentum_config = {2: 0.4, 4: 0.4, 8: 0.5, 16: 0.3} # use 0 for no momentum
stop_loss_config = {2: 0.28005, 4: 0.14305, 8: 0.05975, 16: 0.05915}
plot_color = {2: "red", 4: "cyan", 8: "magenta", 16: "black"}

trained_nets_2 = []

for nb_of_hiddenunits in (2, 4, 8, 16):
    np.random.seed(550)
    learning_rate = lr_config[nb_of_hiddenunits]
    nb_of_epochs = epoch_config[nb_of_hiddenunits]
    batch_size = batchsize_config[nb_of_hiddenunits]

    two_layer_ann = TwoLayerANN(nb_of_hiddenunits, 
                                activation_function=activation_config[nb_of_hiddenunits], 
                                loss_function=loss_config[nb_of_hiddenunits], 
                                use_momentum=True, momentum_factor=momentum_config[nb_of_hiddenunits]) # reset network
    
    fig = plt.figure()
    print(f"Training two layer ANN with {nb_of_hiddenunits} units")
    for epoch in range(nb_of_epochs):
        for i in range(train2_nb_examples//batch_size):
            two_layer_ann.forward(train2_x[i*batch_size:i*batch_size+batch_size].reshape((batch_size, INPUT_DIM, OUTPUT_DIM)))
            two_layer_ann.loss(train2_y[i*batch_size:i*batch_size+batch_size])
            two_layer_ann.backward(learning_rate)
        tla_output = two_layer_ann.forward(train2_x.reshape((train2_nb_examples, INPUT_DIM, OUTPUT_DIM)))
        tla_loss = np.mean(two_layer_ann.loss(train2_y))
        if epoch == 0 or (epoch+1) % (1500 if nb_of_hiddenunits == 16 else 500) == 0:
            print(f"Epoch:{epoch+1}, Two layer ANN loss:{tla_loss:.4f}")
            plt.scatter(train2_x, train2_y)
            tla_output = two_layer_ann.forward(train2_uniform_x_samples.reshape((train2_nb_examples, INPUT_DIM, OUTPUT_DIM)))
            plt.plot(train2_uniform_x_samples, tla_output.reshape((train2_nb_examples, 1)), 
                     color=plot_color[nb_of_hiddenunits], linewidth=3)
            plt.title(f'Two layer ANN ({nb_of_hiddenunits} units), Epoch:{epoch+1}, Training Set, Loss:{tla_loss:.4f}')
            plt.xlabel('x')
            plt.ylabel('y')
            plt.savefig(f'gif/tla_{nb_of_hiddenunits}_2/tla_{epoch+1:05d}.png')
            plt.close()
        if tla_loss < stop_loss_config[nb_of_hiddenunits]:
            print(f"Stopped training, Epoch:{epoch+1}, Two layer ANN loss:{tla_loss:.4f}")
            plt.scatter(train2_x, train2_y)
            tla_output = two_layer_ann.forward(train2_uniform_x_samples.reshape((train2_nb_examples, INPUT_DIM, OUTPUT_DIM)))
            plt.plot(train2_uniform_x_samples, tla_output.reshape((train2_nb_examples, 1)), 
                     color=plot_color[nb_of_hiddenunits], linewidth=3)
            plt.title(f'Two layer ANN ({nb_of_hiddenunits} units), Epoch:{epoch+1}, Training Set, Loss:{tla_loss:.4f}')
            plt.xlabel('x')
            plt.ylabel('y')
            plt.savefig(f'output/tla2_{nb_of_hiddenunits}_train2.png')
            plt.close()
            break
    anim_file = f'gif/tla_{nb_of_hiddenunits}_training2.gif'
    
    frames = []
    filenames = glob.glob(f'gif/tla_{nb_of_hiddenunits}_2/tla_*.png')
    filenames = sorted(filenames)
    for i, filename in enumerate(filenames):
        frames.append(imageio.imread(filename))
    for i in range(10):
        frames.append(imageio.imread(filename))
    
    imageio.mimsave(anim_file, frames, 'GIF', fps=8)
    
    trained_nets_2.append(two_layer_ann)

In [None]:
nb_of_hiddenunits = 2
anim_file = f'gif/tla_{nb_of_hiddenunits}_training2.gif'
display.Image(filename=anim_file)

In [None]:
nb_of_hiddenunits = 4
anim_file = f'gif/tla_{nb_of_hiddenunits}_training2.gif'
display.Image(filename=anim_file)

In [None]:
nb_of_hiddenunits = 8
anim_file = f'gif/tla_{nb_of_hiddenunits}_training2.gif'
display.Image(filename=anim_file)

In [None]:
nb_of_hiddenunits = 16
anim_file = f'gif/tla_{nb_of_hiddenunits}_training2.gif'
display.Image(filename=anim_file)

In [None]:
ann_hidden_units = [2, 4, 8, 16]
for i in range(4):
    two_layer_ann = trained_nets_2[i]
    tla_output = two_layer_ann.forward(train2_x.reshape((train2_nb_examples, INPUT_DIM, OUTPUT_DIM)))
    tla_loss = np.mean(two_layer_ann.loss(train2_y))
    tla_loss_std = np.std(two_layer_ann.loss(train2_y))
    print(f"Two layer ANN, {ann_hidden_units[i]} units, training set loss:{tla_loss:.4f}, std:{tla_loss_std:.4f}")

    fig = plt.figure()
    fig.set_facecolor('w')
    plt.scatter(train2_x, train2_y)
    tla_output = two_layer_ann.forward(train2_uniform_x_samples.reshape((train2_nb_examples, INPUT_DIM, OUTPUT_DIM)))
    plt.plot(train2_uniform_x_samples, tla_output.reshape((train2_nb_examples, 1)), 
             color=plot_color[ann_hidden_units[i]], linewidth=3)
    plt.title(f'Two layer ANN, {ann_hidden_units[i]} units, Training Set, Loss:{np.mean(tla_loss):.4f}')
    plt.xlabel('x')
    plt.ylabel('y')
    plt.savefig(f'output/tla_{ann_hidden_units[i]}_train_curve_2.png')
    plt.show()

    tla_output = two_layer_ann.forward(test2_x.reshape((test2_nb_examples, INPUT_DIM, OUTPUT_DIM)))
    tla_loss = np.mean(two_layer_ann.loss(test2_y))
    tla_loss_std = np.std(two_layer_ann.loss(test2_y))
    print(f"Two layer ANN, {ann_hidden_units[i]} units, test set loss:{tla_loss:.4f}, std:{tla_loss_std:.4f}")

    fig = plt.figure()
    fig.set_facecolor('w')
    plt.scatter(test2_x, test2_y)
    tla_output = two_layer_ann.forward(train2_uniform_x_samples.reshape((train2_nb_examples, INPUT_DIM, OUTPUT_DIM)))
    plt.plot(train2_uniform_x_samples, tla_output.reshape((train2_nb_examples, 1)), 
             color=plot_color[ann_hidden_units[i]], linewidth=3)
    plt.title(f'Two layer ANN, {ann_hidden_units[i]} units, Test Set, Loss:{np.mean(tla_loss):.4f}')
    plt.xlabel('x')
    plt.ylabel('y')
    plt.savefig(f'output/tla_{ann_hidden_units[i]}_test_curve_2.png')
    plt.show()

In [None]:
fig = plt.figure()
fig.set_facecolor('w')
plt.scatter(train2_x, train2_y)

two_layer_ann = trained_nets_2[0]
tla_output = two_layer_ann.forward(train2_uniform_x_samples.reshape((train2_nb_examples, INPUT_DIM, OUTPUT_DIM)))
plt.plot(train2_uniform_x_samples, tla_output.reshape((train2_nb_examples, 1)), label='2 Units', 
             color=plot_color[2], linewidth=3)

two_layer_ann = trained_nets_2[1]
tla_output = two_layer_ann.forward(train2_uniform_x_samples.reshape((train2_nb_examples, INPUT_DIM, OUTPUT_DIM)))
plt.plot(train2_uniform_x_samples, tla_output.reshape((train2_nb_examples, 1)), label='4 Units', 
             color=plot_color[4], linewidth=3)

two_layer_ann = trained_nets_2[2]
tla_output = two_layer_ann.forward(train2_uniform_x_samples.reshape((train2_nb_examples, INPUT_DIM, OUTPUT_DIM)))
plt.plot(train2_uniform_x_samples, tla_output.reshape((train2_nb_examples, 1)), label='8 Units', 
             color=plot_color[8], linewidth=3)

two_layer_ann = trained_nets_2[3]
tla_output = two_layer_ann.forward(train2_uniform_x_samples.reshape((train2_nb_examples, INPUT_DIM, OUTPUT_DIM)))
plt.plot(train2_uniform_x_samples, tla_output.reshape((train2_nb_examples, 1)), label='16 Units', 
             color=plot_color[16], linewidth=3)

leg = plt.legend(loc='lower left')

for legobj in leg.legendHandles:
    legobj.set_linewidth(3)

plt.title(f'Two layer ANNs with different number of hidden units')
plt.xlabel('x')
plt.ylabel('y')
plt.savefig('output/tla_all_curves_2.png')
plt.show()

In [None]:
nb_of_hiddenunits = 8
lr = {0: 1, 1: 1e-1, 2: 1e-2, 3: 1e-3, 4: 1e-4}
nb_epoch = {0: 1, 1: 500, 2: 2500, 3: 25000, 4: 225000}
batch_size = 229
activation = "sigmoid"
loss = "mse"
momentum = 0.99
stop_loss = 0.12
plot_color = "magenta"

for j in lr:
    np.random.seed(550)
    learning_rate = lr[j]
    nb_of_epochs = nb_epoch[j]

    two_layer_ann = TwoLayerANN(nb_of_hiddenunits, 
                                activation_function=activation, 
                                loss_function=loss, 
                                use_momentum=True, momentum_factor=momentum) # reset network
    
    fig = plt.figure()
    fig.set_facecolor('w')
    print(f"Training two layer ANN with {nb_of_hiddenunits} units, LR:{learning_rate}")
    for epoch in range(nb_of_epochs):
        for i in range(train2_nb_examples//batch_size):
            two_layer_ann.forward(train2_x[i*batch_size:i*batch_size+batch_size].reshape((batch_size, INPUT_DIM, OUTPUT_DIM)))
            two_layer_ann.loss(train2_y[i*batch_size:i*batch_size+batch_size])
            two_layer_ann.backward(learning_rate)
        tla_output = two_layer_ann.forward(train2_x.reshape((train2_nb_examples, INPUT_DIM, OUTPUT_DIM)))
        tla_loss = np.mean(two_layer_ann.loss(train2_y))
        if epoch == 0 or (epoch+1) % (1 if j == 0 else (500 if j < 3 else (10000 if j == 3 else 30000))) == 0:
            print(f"Epoch:{epoch+1}, Two layer ANN loss:{tla_loss:.4f}")
        if tla_loss < stop_loss:
            print(f"Stopped training, Epoch:{epoch+1}, Two layer ANN loss:{tla_loss:.4f}")
            plt.scatter(train2_x, train2_y)
            tla_output = two_layer_ann.forward(train2_uniform_x_samples.reshape((train2_nb_examples, INPUT_DIM, OUTPUT_DIM)))
            plt.plot(train2_uniform_x_samples, tla_output.reshape((train2_nb_examples, 1)), plot_color,
                     linewidth=3)
            plt.title(f'Two layer ANN ({nb_of_hiddenunits} units), Epoch:{epoch+1}, Training Set, Loss:{tla_loss:.4f}')
            plt.xlabel('x')
            plt.ylabel('y')
            plt.savefig(f'output/tla2_{nb_of_hiddenunits}_train2_d_{j}.png')
            plt.show()
            break
        if epoch == nb_of_epochs - 1:
            print(f"Epoch:{epoch+1}, Two layer ANN loss:{tla_loss:.4f}")
            plt.scatter(train2_x, train2_y)
            tla_output = two_layer_ann.forward(train2_uniform_x_samples.reshape((train2_nb_examples, INPUT_DIM, OUTPUT_DIM)))
            plt.plot(train2_uniform_x_samples, tla_output.reshape((train2_nb_examples, 1)), plot_color, 
                     linewidth=3)
            plt.title(f'Two layer ANN ({nb_of_hiddenunits} units), Epoch:{epoch+1}, Training Set, Loss:{tla_loss:.4f}')
            plt.xlabel('x')
            plt.ylabel('y')
            plt.savefig(f'output/tla2_{nb_of_hiddenunits}_train2_d_{j}.png')
            plt.show()
        

In [None]:
nb_of_hiddenunits = 8
learning_rate = 1e-2
nb_of_epochs = 300000
batch_size = 229
activation = "sigmoid"
loss = "mse"
mf = {0: 0, 1: 0.99}
stop_loss = 0.12
plot_color = "magenta"

for j in mf:
    np.random.seed(550)
    momentum = mf[j]
    two_layer_ann = TwoLayerANN(nb_of_hiddenunits, 
                                activation_function=activation, 
                                loss_function=loss, 
                                use_momentum=True, momentum_factor=momentum) # reset network
    
    fig = plt.figure()
    fig.set_facecolor('w')
    print(f"Training two layer ANN with {nb_of_hiddenunits} units, MF:{momentum}")
    for epoch in range(nb_of_epochs):
        for i in range(train2_nb_examples//batch_size):
            two_layer_ann.forward(train2_x[i*batch_size:i*batch_size+batch_size].reshape((batch_size, INPUT_DIM, OUTPUT_DIM)))
            two_layer_ann.loss(train2_y[i*batch_size:i*batch_size+batch_size])
            two_layer_ann.backward(learning_rate)
        tla_output = two_layer_ann.forward(train2_x.reshape((train2_nb_examples, INPUT_DIM, OUTPUT_DIM)))
        tla_loss = np.mean(two_layer_ann.loss(train2_y))
        if epoch == 0 or (epoch+1) % (40000 if j == 0 else 500) == 0:
            print(f"Epoch:{epoch+1}, Two layer ANN loss:{tla_loss:.4f}")
        if tla_loss < stop_loss:
            print(f"Stopped training, Epoch:{epoch+1}, Two layer ANN loss:{tla_loss:.4f}")
            plt.scatter(train2_x, train2_y)
            tla_output = two_layer_ann.forward(train2_uniform_x_samples.reshape((train2_nb_examples, INPUT_DIM, OUTPUT_DIM)))
            plt.plot(train2_uniform_x_samples, tla_output.reshape((train2_nb_examples, 1)), plot_color,
                     linewidth=3)
            plt.title(f'Two layer ANN ({nb_of_hiddenunits} units), Epoch:{epoch+1}, Training Set, Loss:{tla_loss:.4f}')
            plt.xlabel('x')
            plt.ylabel('y')
            plt.savefig(f'output/tla2_{nb_of_hiddenunits}_train2_e_{j}.png')
            plt.show()
            break        

In [None]:
nb_of_hiddenunits = 8
learning_rates = {0: 8e-3, 1: 1.5e-2}
nb_of_epochs_config = {0: 3500, 1: 110000} 
batch_sizes = {0: 1, 1: 229}
activation = "sigmoid"
loss = "mse"
mf = {0: 0.1, 1: 0.2}
stop_loss = 0.125
plot_color = "magenta"

for j in batch_sizes:
    np.random.seed(550)
    learning_rate = learning_rates[j]
    batch_size = batch_sizes[j]
    momentum = mf[j]
    nb_of_epochs = nb_of_epochs_config[j]
    two_layer_ann = TwoLayerANN(nb_of_hiddenunits, 
                                activation_function=activation, 
                                loss_function=loss, 
                                use_momentum=True, momentum_factor=momentum) # reset network
    
    fig = plt.figure()
    fig.set_facecolor('w')
    print(f"Training two layer ANN with {nb_of_hiddenunits} units, BS:{batch_size}")
    for epoch in range(nb_of_epochs):
        for i in range(train2_nb_examples//batch_size):
            two_layer_ann.forward(train2_x[i*batch_size:i*batch_size+batch_size].reshape((batch_size, INPUT_DIM, OUTPUT_DIM)))
            two_layer_ann.loss(train2_y[i*batch_size:i*batch_size+batch_size])
            two_layer_ann.backward(learning_rate)
        tla_output = two_layer_ann.forward(train2_x.reshape((train2_nb_examples, INPUT_DIM, OUTPUT_DIM)))
        tla_loss = np.mean(two_layer_ann.loss(train2_y))
        if epoch == 0 or (epoch+1) % (500 if j == 0 else 20000) == 0:
            print(f"Epoch:{epoch+1}, Two layer ANN loss:{tla_loss:.4f}")
        if tla_loss < stop_loss:
            print(f"Stopped training, Epoch:{epoch+1}, Two layer ANN loss:{tla_loss:.4f}")
            plt.scatter(train2_x, train2_y)
            tla_output = two_layer_ann.forward(train2_uniform_x_samples.reshape((train2_nb_examples, INPUT_DIM, OUTPUT_DIM)))
            plt.plot(train2_uniform_x_samples, tla_output.reshape((train2_nb_examples, 1)), plot_color,
                     linewidth=3)
            plt.title(f'Two layer ANN ({nb_of_hiddenunits} units), Epoch:{epoch+1}, Training Set, Loss:{tla_loss:.4f}')
            plt.xlabel('x')
            plt.ylabel('y')
            plt.savefig(f'output/tla2_{nb_of_hiddenunits}_train2_f_{j}.png')
            plt.show()
            break
        if epoch == nb_of_epochs - 1:
            print(f"Epoch:{epoch+1}, Two layer ANN loss:{tla_loss:.4f}")
            plt.scatter(train2_x, train2_y)
            tla_output = two_layer_ann.forward(train2_uniform_x_samples.reshape((train2_nb_examples, INPUT_DIM, OUTPUT_DIM)))
            plt.plot(train2_uniform_x_samples, tla_output.reshape((train2_nb_examples, 1)), plot_color, 
                     linewidth=3)
            plt.title(f'Two layer ANN ({nb_of_hiddenunits} units), Epoch:{epoch+1}, Training Set, Loss:{tla_loss:.4f}')
            plt.xlabel('x')
            plt.ylabel('y')
            plt.savefig(f'output/tla2_{nb_of_hiddenunits}_train2_f_{j}.png')
            plt.show()
