# A neural network in Python from scratch

In [3]:
import numpy as np
from typing import Union, Callable
from copy import deepcopy

from forward_propagation import (
    forward_propagation, 
    create_weights, 
    vector_to_weights)

from activation_functions import relu, sigmoid 
import test_functions
from gradient_descent import gradient_descent

%load_ext autoreload
%autoreload 2
np.random.seed(42)

## Multiple inputs

Give as inputs a vector $(1,2,5,4)$, processed by a layer of 3 neurons.  
`weights` has dimension $\text{nb_inputs}\times (\text{nb_neurons}+1)$   (the +1 is for the biases, the last column of `weights`, -1, 0 and 1 in the first example).

In [4]:
inputs = np.array([[1,2,5,4]])
weights = [
        np.array(
            [
                [1,0.2,0.5,1,-1],
                [2,1,3,5,0],
                [0.2,0.1,0.6,0.78,1]
            ]
        )
    ]
activation = sigmoid
forward_propagation(inputs,weights,activation)

array([[0.99899323, 1.        , 0.99945816]])

Another example:

In [5]:
inputs = np.array([[1,0.2,0.15,0.024]])
weights = [
        np.array(
            [
                [1,0.2,0.5,1,-1],
                [2,1,3,5,0],
                [0.2,0.1,0.6,0.78,1]
            ]
        )
    ]
activation = sigmoid
forward_propagation(inputs,weights,activation)

array([[0.53469416, 0.94103299, 0.79062883]])

With the same network, calculate 2 data inputs entered as rows in `inputs`:

In [6]:
inputs = np.array([[1,2,5,4],[1,0.2,0.15,0.024]])
weights = [
        np.array(
            [
                [1,0.2,0.5,1,-1],
                [2,1,3,5,0],
                [0.2,0.1,0.6,0.78,1]
            ]
        )
    ]
activation = sigmoid
forward_propagation(inputs,weights,activation)

array([[0.99899323, 1.        , 0.99945816],
       [0.53469416, 0.94103299, 0.79062883]])

The following network has 2 layers, the first going from the 4 input components to the 3 internal neurons, the second going from the 3 internal neurons outputs to the 2 outputs:

In [7]:
inputs = np.array([[1,2,5,4],[1,0.2,0.15,0.024]])
weights = [
        np.array(
            [
                [1,0.2,0.5,1,-1],
                [2,1,3,5,0],
                [0.2,0.1,0.6,0.78,1]
            ]
        ),
    np.array(
            [
                [1,0.2,0.5,1],
                [2,1,3,5]
            ]
        )
    ]
activation = sigmoid
forward_propagation(inputs,weights,activation)

array([[0.93695121, 0.99998324],
       [0.89266103, 0.99991581]])

### Creating a data set 
Create a data set of points sampled randomly from a function.

In [None]:
# very easy function
def linear_function(x):
    d=len(x)
    xstar=np.array(range(1,(d+1)))
    xx=x.dot(xstar) + 3
    return xx

In [None]:
def simulate_data_target(fun: Callable,
                       n_features: int,
                       n_obs: int,
                       LB: list[float] = -5,
                       UB: list[float] = 5) -> dict:
    
    entry_data = np.random.uniform(low= [LB] * n_features,
                                   high= [UB] * n_features,
                                   size=(n_obs, n_features))
    target = np.apply_along_axis(fun, 1, entry_data)
    
    return {"data": entry_data, "target": target}

In [None]:
#used_function = test_functions.sphere
used_function = linear_function
simulated_data = simulate_data_target(fun = used_function,n_features = 2,n_obs=10)

In [None]:
simulated_data

### Neural Network weight and output

In [None]:
weights = create_weights([2,1])
predicted_output = forward_propagation(simulated_data["data"],weights,sigmoid)

In [None]:
weights

In [None]:
simulated_data["target"]

In [None]:
predicted_output.reshape(-1,)

In [None]:
vector_to_weights([0.28677805, -0.07982693,  0.37394315],[2,1])

### Error function

In [None]:
# mean squared error
def cost_function_mse(y_predicted: np.ndarray,y_observed: np.ndarray):
    error = np.mean((y_predicted - y_observed)**2)
    return error

In [None]:
# entropy
def cost_function_entropy(y_predicted: np.ndarray,y_observed: np.ndarray):

    n = len(y_observed)
    
    term_A = np.multiply(np.log(y_predicted),y_observed)
    term_B = np.multiply(1-y_observed,np.log(1-y_predicted))
    
    error = - (1/n)*(np.sum(term_A)+np.sum(term_B))

    return(error)

In [None]:
def error_with_parameters(vector_weights: np.ndarray,
                          activation_function: Callable,
                          data: dict,
                          cost_function: Callable,
                          regularization: float = 0) -> float:
    
    weights = vector_to_weights(vector_weights,used_network_structure)
    predicted_output = forward_propagation(data["data"],weights,activation_function)
    predicted_output = predicted_output.reshape(-1,)
    
    error = cost_function(predicted_output,data["target"]) + regularization * np.sum(np.abs(vector_weights))
    
    return error

In [None]:
used_network_structure = [2,1] # 2 inputs features, 1 layer with 1 node
used_activation = relu
used_data = simulated_data
used_cost_function = cost_function_mse

def neural_network_cost(vector_weights):
    
    cost = error_with_parameters(vector_weights,
                          activation_function = used_activation,
                          data = used_data,
                          cost_function = used_cost_function)
    
    return cost

In [None]:
neural_network_cost(np.array([0.28677805, -0.07982693,  0.37394315]))

### Gradient descent

In [None]:
gradient_descent(func = neural_network_cost,
                 start_x = np.array([0.28677805, -0.07982693,  0.37394315]),
                 LB = [-5] * 3, UB = [5] * 3,budget = 100000)