In [1]:
import numpy as np

## Generate training data

<img src="./assets/data_point_regression.png" alt="neural_network" width="300">
$
f(X) =
\begin{cases} 
0 & \text{if } X \leq 0, \\
X & \text{if } 0 < X \leq 1, \\
2 - X & \text{if } 1 < X \leq 2, \\
X - 2 & \text{if } X > 2.
\end{cases}
$

In [2]:
X = np.linspace(0, 3, 50).reshape(-1, 1)

In [3]:
y = np.array([0 ,
0.06122449,
0.12244898,
0.18367347,
0.24489796,
0.30612245,
0.36734694,
0.42857143,
0.48979592,
0.55102041,
0.6122449 ,
0.67346939,
0.73469388,
0.79591837,
0.85714286,
0.91836735,
0.97959184,
1.04081633,
1.10204082,
1.16326531,
1.2244898 ,
1.28571429,
1.34693878,
1.40816327,
1.46938776,
1.53061224,
1.59183673,
1.65306122,
1.71428571,
1.7755102 ,
1.83673469,
1.89795918,
1.95918367,
2.02040816,
2.08163265,
2.14285714,
2.20408163,
2.26530612,
2.32653061,
2.3877551 ,
2.44897959,
2.51020408,
2.57142857,
2.63265306,
2.69387755,
2.75510204,
2.81632653,
2.87755102,
2.93877551,
3]).reshape(-1, 1)

## Set Neural Network 

<img src="./assets/neural_network.png" alt="neural_network" width="300">

In [4]:
# Initialize neural network parameters
input_dim = 1
hidden_dim = 3
output_dim = 1

np.random.seed(42)
# Define hidden layer weights and biases
weights_input_hidden = np.random.randn(input_dim, hidden_dim)
bias_hidden = np.zeros((1, hidden_dim))           

# Define output layer weights and biases
weights_hidden_output = np.random.randn(hidden_dim, output_dim)
bias_output = np.zeros((1, output_dim))

# Define the activation function (ReLU)
def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return (x > 0).astype(float)

# Define the loss function (MSE)
def mse_loss(y_true, y_pred):
    return np.mean((y_true - y_pred)**2)

def mse_loss_derivative(y_true, y_pred):
    return 2 * (y_pred - y_true) / y_true.size

## Training

In [5]:
# Hyperparameters
learning_rate = 0.01
epochs = 100

# Training loop
for epoch in range(epochs):
    hidden_input = np.dot(X, weights_input_hidden) + bias_hidden  # Linear transformation
    hidden_output = relu(hidden_input)                           # Apply ReLU
    final_input = np.dot(hidden_output, weights_hidden_output) + bias_output  # Linear transformation
    y_pred = final_input                                         # Output layer (no activation for regression)

    # Compute loss
    loss = mse_loss(y, y_pred)

    # Backpropagation
    loss_gradient = mse_loss_derivative(y, y_pred)              # dL/dY^
    grad_output_weights = np.dot(hidden_output.T, loss_gradient)  # Gradient for weights_hidden_output
    grad_output_bias = np.sum(loss_gradient, axis=0, keepdims=True)  # Gradient for bias_output

    hidden_gradient = np.dot(loss_gradient, weights_hidden_output.T) * relu_derivative(hidden_input)  # Backprop through ReLU
    grad_input_weights = np.dot(X.T, hidden_gradient)           # Gradient for weights_input_hidden
    grad_input_bias = np.sum(hidden_gradient, axis=0, keepdims=True)  # Gradient for bias_hidden

    # Update weights and biases
    weights_hidden_output -= learning_rate * grad_output_weights
    bias_output -= learning_rate * grad_output_bias
    weights_input_hidden -= learning_rate * grad_input_weights
    bias_hidden -= learning_rate * grad_input_bias

    # Print loss every 10 epochs
    if epoch % 10 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.4f}")

Epoch 0, Loss: 0.4732
Epoch 10, Loss: 0.0075
Epoch 20, Loss: 0.0046
Epoch 30, Loss: 0.0035
Epoch 40, Loss: 0.0026
Epoch 50, Loss: 0.0020
Epoch 60, Loss: 0.0015
Epoch 70, Loss: 0.0011
Epoch 80, Loss: 0.0008
Epoch 90, Loss: 0.0006


## Testing

In [6]:
test_input = np.array([[1],[0.5]])
hidden_input = np.dot(test_input, weights_input_hidden) + bias_hidden
hidden_output = relu(hidden_input)
final_output = np.dot(hidden_output, weights_hidden_output) + bias_output

for i in range(test_input.shape[0]): 
    print(f"Prediction for input {test_input[i][0]}: {final_output[i][0]:.4f}")

Prediction for input 1.0: 1.0198
Prediction for input 0.5: 0.5310
