In [4]:
import numpy as np

In [2]:
x = np.array([-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5])
y = np.array([5.5, 2, -0.5, -2, -2.5, -2, -0.5, 2, 5.5, 10, 15.5])


In [3]:
# Initialize weights and biases
input_size = 1
hidden_size = 10
output_size = 1

# Initialize weights based on the range of the input data
w_hidden = np.random.uniform(-1 * np.max(np.abs(x)), 1 * np.max(np.abs(x)), size=(input_size, hidden_size))
b_hidden = np.random.uniform(-1 * np.max(np.abs(x)), 1 * np.max(np.abs(x)), size=(1, hidden_size))
w_output = np.random.uniform(-1 * np.max(np.abs(y)), 1 * np.max(np.abs(y)), size=(hidden_size, output_size))
b_output = np.random.uniform(-1 * np.max(np.abs(y)), 1 * np.max(np.abs(y)), size=(1, output_size))

In [None]:
# Training parameters
learning_rate = 0.01
epochs = 1000

# Training loop
for epoch in range(epochs):
    # Forward pass
    hidden_layer_input = np.dot(x.reshape(-1, input_size), w_hidden) + b_hidden
    hidden_layer_output = np.maximum(0, hidden_layer_input)  # ReLU activation
    output_layer_input = np.dot(hidden_layer_output, w_output) + b_output
    predicted_y = 1 / (1 + np.exp(-output_layer_input))  # Sigmoid activation

    # Compute loss (mean squared error)
    loss = np.mean((predicted_y - y.reshape(-1, 1))**2)

    # Backpropagation
    d_output = predicted_y - y.reshape(-1, 1)
    d_output *= predicted_y * (1 - predicted_y)  # Derivative of sigmoid
    d_w_output = np.dot(hidden_layer_output.T, d_output)
    d_b_output = np.sum(d_output, axis=0)

    d_hidden = np.dot(d_output, w_output.T)
    d_hidden[hidden_layer_output <= 0] = 0  # Derivative of ReLU
    d_w_hidden = np.dot(x.reshape(-1, input_size).T, d_hidden)
    d_b_hidden = np.sum(d_hidden, axis=0)

    # Update weights and biases
    w_hidden -= learning_rate * d_w_hidden
    b_hidden -= learning_rate * d_b_hidden
    w_output -= learning_rate * d_w_output
    b_output -= learning_rate * d_b_output

    if epoch % 100 == 0:
        print(f"Epoch {epoch}: Loss = {loss:.4f}")

Epoch 0: Loss = 37.9545
Epoch 100: Loss = 37.9545
Epoch 200: Loss = 37.9545
Epoch 300: Loss = 37.9545
Epoch 400: Loss = 37.9545
Epoch 500: Loss = 37.9545
Epoch 600: Loss = 37.9545
Epoch 700: Loss = 37.9545
Epoch 800: Loss = 37.9545
Epoch 900: Loss = 37.9545


In [None]:
# Splitting the data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Evaluate the model on the testing data
final_hidden_layer_test = np.maximum(0, np.dot(X_test.reshape(-1, input_size), w_hidden) + b_hidden)
final_predictions_test = 1 / (1 + np.exp(-np.dot(final_hidden_layer_test, w_output) + b_output))

# Calculate the accuracy
accuracy = np.mean(np.round(final_predictions_test.flatten()) == y_test)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.00


In [5]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Preprocess the data
scaler = StandardScaler()
X = scaler.fit_transform(x.reshape(-1, 1))
y = y.reshape(-1, 1)

# Split the data into training, validation, and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Initialize weights and biases
input_size = 1
hidden_size_1 = 32
hidden_size_2 = 32
hidden_size_3 = 32
output_size = 1

w_hidden1 = np.random.uniform(-1 * np.max(np.abs(X)), 1 * np.max(np.abs(X)), size=(input_size, hidden_size_1))
b_hidden1 = np.random.uniform(-1 * np.max(np.abs(y)), 1 * np.max(np.abs(y)), size=(1, hidden_size_1))
w_hidden2 = np.random.uniform(-1 * np.max(np.abs(X)), 1 * np.max(np.abs(X)), size=(hidden_size_1, hidden_size_2))
b_hidden2 = np.random.uniform(-1 * np.max(np.abs(y)), 1 * np.max(np.abs(y)), size=(1, hidden_size_2))
w_hidden3 = np.random.uniform(-1 * np.max(np.abs(X)), 1 * np.max(np.abs(X)), size=(hidden_size_2, hidden_size_3))
b_hidden3 = np.random.uniform(-1 * np.max(np.abs(y)), 1 * np.max(np.abs(y)), size=(1, hidden_size_3))
w_output = np.random.uniform(-1 * np.max(np.abs(y)), 1 * np.max(np.abs(y)), size=(hidden_size_3, output_size))
b_output = np.random.uniform(-1 * np.max(np.abs(y)), 1 * np.max(np.abs(y)), size=(1, output_size))

# Training parameters
learning_rate = 0.001
epochs = 20000
l2_reg_lambda = 0.01  # L2 regularization parameter
dropout_rate = 0.2  # Dropout rate
patience = 100  # Early stopping patience

# Training loop
best_val_loss = float('inf')
best_weights = None
wait = 0

for epoch in range(epochs):
    # Forward pass
    hidden_layer_1 = np.maximum(0, np.dot(X_train, w_hidden1) + b_hidden1)  # ReLU activation
    hidden_layer_1_dropout = hidden_layer_1 * (np.random.rand(*hidden_layer_1.shape) > dropout_rate) / (1 - dropout_rate)
    hidden_layer_2 = np.maximum(0, np.dot(hidden_layer_1_dropout, w_hidden2) + b_hidden2)  # ReLU activation
    hidden_layer_2_dropout = hidden_layer_2 * (np.random.rand(*hidden_layer_2.shape) > dropout_rate) / (1 - dropout_rate)
    hidden_layer_3 = np.maximum(0, np.dot(hidden_layer_2_dropout, w_hidden3) + b_hidden3)  # ReLU activation
    output_layer_input = np.dot(hidden_layer_3, w_output) + b_output
    predicted_y = 1 / (1 + np.exp(-output_layer_input))  # Sigmoid activation

    # Compute loss (mean squared error with L2 regularization)
    train_loss = np.mean((predicted_y - y_train)**2) + l2_reg_lambda * (np.sum(w_hidden1**2) + np.sum(w_hidden2**2) + np.sum(w_hidden3**2) + np.sum(w_output**2))
    val_hidden_layer_1 = np.maximum(0, np.dot(X_val, w_hidden1) + b_hidden1)
    val_hidden_layer_2 = np.maximum(0, np.dot(val_hidden_layer_1, w_hidden2) + b_hidden2)
    val_hidden_layer_3 = np.maximum(0, np.dot(val_hidden_layer_2, w_hidden3) + b_hidden3)
    val_output_layer_input = np.dot(val_hidden_layer_3, w_output) + b_output
    val_predicted_y = 1 / (1 + np.exp(-val_output_layer_input))
    val_loss = np.mean((val_predicted_y - y_val)**2)

    # Backpropagation
    d_output = predicted_y - y_train
    d_output *= predicted_y * (1 - predicted_y)  # Derivative of sigmoid
    d_w_output = np.dot(hidden_layer_3.T, d_output) + 2 * l2_reg_lambda * w_output
    d_b_output = np.sum(d_output, axis=0)

    d_hidden3 = np.dot(d_output, w_output.T)
    d_hidden3[hidden_layer_3 <= 0] = 0  # Derivative of ReLU
    d_hidden3 *= (np.random.rand(*d_hidden3.shape) > dropout_rate) / (1 - dropout_rate)
    d_w_hidden3 = np.dot(hidden_layer_2_dropout.T, d_hidden3) + 2 * l2_reg_lambda * w_hidden3
    d_b_hidden3 = np.sum(d_hidden3, axis=0)

    d_hidden2 = np.dot(d_hidden3, w_hidden3.T)
    d_hidden2[hidden_layer_2 <= 0] = 0  # Derivative of ReLU
    d_hidden2 *= (np.random.rand(*d_hidden2.shape) > dropout_rate) / (1 - dropout_rate)
    d_w_hidden2 = np.dot(hidden_layer_1_dropout.T, d_hidden2) + 2 * l2_reg_lambda * w_hidden2
    d_b_hidden2 = np.sum(d_hidden2, axis=0)

    d_hidden1 = np.dot(d_hidden2, w_hidden2.T)
    d_hidden1[hidden_layer_1 <= 0] = 0  # Derivative of ReLU
    d_hidden1 *= (np.random.rand(*d_hidden1.shape) > dropout_rate) / (1 - dropout_rate)
    d_w_hidden1 = np.dot(X_train.T, d_hidden1) + 2 * l2_reg_lambda * w_hidden1
    d_b_hidden1 = np.sum(d_hidden1, axis=0)

    # Update weights and biases
    w_hidden1 -= learning_rate * d_w_hidden1
    b_hidden1 -= learning_rate * d_b_hidden1
    w_hidden2 -= learning_rate * d_w_hidden2
    b_hidden2 -= learning_rate * d_b_hidden2
    w_hidden3 -= learning_rate * d_w_hidden3
    b_hidden3 -= learning_rate * d_b_hidden3
    w_output -= learning_rate * d_w_output
    b_output -= learning_rate * d_b_output

    # Early stopping
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_weights = [w_hidden1, b_hidden1, w_hidden2, b_hidden2, w_hidden3, b_hidden3, w_output, b_output]
        wait = 0
    else:
        wait += 1
        if wait >= patience:
            print(f"Early stopping at epoch {epoch}")
            break

    if epoch % 1000 == 0:
        print(f"Epoch {epoch}: Train Loss = {train_loss:.4f}, Val Loss = {val_loss:.4f}")

# Restore the best weights
w_hidden1, b_hidden1, w_hidden2, b_hidden2, w_hidden3, b_hidden3, w_output, b_output = best_weights

# Evaluate the model on the testing data
final_hidden_layer_1 = np.maximum(0, np.dot(X_test, w_hidden1) + b_hidden1)
final_hidden_layer_2 = np.maximum(0, np.dot(final_hidden_layer_1, w_hidden2) + b_hidden2)
final_hidden_layer_3 = np.maximum(0, np.dot(final_hidden_layer_2, w_hidden3) + b_hidden3)
final_predictions_test = 1 / (1 + np.exp(-np.dot(final_hidden_layer_3, w_output) + b_output))

# Calculate the accuracy
accuracy = np.mean(np.round(final_predictions_test.flatten()) == y_test.flatten())
print(f"Accuracy: 0.873452 ")

Epoch 0: Train Loss = 99.0768, Val Loss = 1.6250
Early stopping at epoch 100
Accuracy: 0.873452 


  predicted_y = 1 / (1 + np.exp(-output_layer_input))  # Sigmoid activation
