In [1]:
import numpy as np
from sklearn.datasets import make_moons
from sklearn.preprocessing import StandardScaler

In [2]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

In [3]:
def sigmoid_derivative(X):
    return X*(1-X)

In [5]:
X, y = make_moons(n_samples=1000, noise=0.1, random_state=42)
y = y.reshape(-1, 1) 

In [6]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [7]:
input_size = 2
hidden_size = 4
output_size = 1

In [8]:
np.random.seed(42)  
W1 = np.random.randn(input_size, hidden_size)
b1 = np.zeros((1, hidden_size))
W2 = np.random.randn(hidden_size, output_size)
b2 = np.zeros((1, output_size))

In [9]:
learning_rate = 0.1
epochs = 100

In [10]:
for epoch in range(epochs):
    # Forward propagation
    Z1 = np.dot(X, W1) + b1
    A1 = sigmoid(Z1)
    Z2 = np.dot(A1, W2) + b2
    A2 = sigmoid(Z2)
    
    # Compute loss (Binary Cross-Entropy)
    loss = -np.mean(y * np.log(A2) + (1 - y) * np.log(1 - A2))
    
    if epoch % 10 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.4f}")
    
    # Backward propagation
    dA2 = A2 - y
    dZ2 = dA2 * sigmoid_derivative(A2)
    dW2 = np.dot(A1.T, dZ2) / X.shape[0]
    db2 = np.mean(dZ2, axis=0, keepdims=True)
    
    dA1 = np.dot(dZ2, W2.T)
    dZ1 = dA1 * sigmoid_derivative(A1)
    dW1 = np.dot(X.T, dZ1) / X.shape[0]
    db1 = np.mean(dZ1, axis=0, keepdims=True)
    
    # Update weights and biases
    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1
    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2


Epoch 0, Loss: 0.7119
Epoch 10, Loss: 0.7022
Epoch 20, Loss: 0.6935
Epoch 30, Loss: 0.6858
Epoch 40, Loss: 0.6789
Epoch 50, Loss: 0.6725
Epoch 60, Loss: 0.6666
Epoch 70, Loss: 0.6612
Epoch 80, Loss: 0.6560
Epoch 90, Loss: 0.6511
