In [1]:
import numpy as np

class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        self.W1 = np.random.randn(input_size, hidden_size) / np.sqrt(input_size)
        self.b1 = np.zeros((1, hidden_size))
        self.W2 = np.random.randn(hidden_size, output_size) / np.sqrt(hidden_size)
        self.b2 = np.zeros((1, output_size))

    def forward(self, X):
        self.z1 = np.dot(X, self.W1) + self.b1
        self.a1 = self.sigmoid(self.z1)
        self.z2 = np.dot(self.a1, self.W2) + self.b2
        self.a2 = self.sigmoid(self.z2)
        return self.a2

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def sigmoid_derivative(self, x):
        return x * (1 - x)

    def compute_loss(self, X, y, lambda_):
        m = X.shape[0]
        y_hat = self.forward(X)
        loss = -np.sum(y * np.log(y_hat) + (1 - y) * np.log(1 - y_hat)) / m
        
        # Add L2 regularization term (weight decay)
        l2_reg = (lambda_ / (2 * m)) * (np.sum(np.square(self.W1)) + np.sum(np.square(self.W2)))
        return loss + l2_reg

    def backward(self, X, y, learning_rate, lambda_):
        m = X.shape[0]
        
        # Compute gradients
        delta2 = self.a2 - y
        dW2 = np.dot(self.a1.T, delta2) / m
        db2 = np.sum(delta2, axis=0, keepdims=True) / m
        
        delta1 = np.dot(delta2, self.W2.T) * self.sigmoid_derivative(self.a1)
        dW1 = np.dot(X.T, delta1) / m
        db1 = np.sum(delta1, axis=0, keepdims=True) / m
        
        # Add weight decay term to weight gradients
        dW2 += (lambda_ / m) * self.W2
        dW1 += (lambda_ / m) * self.W1
        
        # Update parameters
        self.W2 -= learning_rate * dW2
        self.b2 -= learning_rate * db2
        self.W1 -= learning_rate * dW1
        self.b1 -= learning_rate * db1

def train_nn_with_weight_decay(X, y, hidden_size, epochs, learning_rate, lambda_):
    input_size = X.shape[1]
    output_size = y.shape[1]
    
    nn = NeuralNetwork(input_size, hidden_size, output_size)
    
    for epoch in range(epochs):
        # Forward pass
        nn.forward(X)
        
        # Compute loss
        loss = nn.compute_loss(X, y, lambda_)
        
        # Backward pass and update weights
        nn.backward(X, y, learning_rate, lambda_)
        
        if epoch % 100 == 0:
            print(f"Epoch {epoch}, Loss: {loss}")
    
    return nn

# Example usage
if __name__ == "__main__":
    # Generate some example data
    np.random.seed(0)
    X = np.random.randn(100, 2)
    y = np.array([(x[0] + x[1] > 0).astype(int) for x in X]).reshape(-1, 1)

    # Train the neural network
    trained_nn = train_nn_with_weight_decay(X, y, hidden_size=4, epochs=1000, learning_rate=0.1, lambda_=0.01)

    # Make predictions
    predictions = trained_nn.forward(X)
    print("Final predictions:", predictions[:5])

Epoch 0, Loss: 0.6735298284295481
Epoch 100, Loss: 0.5499198684645213
Epoch 200, Loss: 0.40330652791125765
Epoch 300, Loss: 0.29090332772000466
Epoch 400, Loss: 0.22158326212527774
Epoch 500, Loss: 0.17868648371217677
Epoch 600, Loss: 0.15047406159785276
Epoch 700, Loss: 0.13076492939934675
Epoch 800, Loss: 0.11629410532703004
Epoch 900, Loss: 0.1052406971550421
Final predictions: [[0.98977573]
 [0.99072822]
 [0.97251747]
 [0.96434438]
 [0.80288299]]


In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

class NeuralNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(NeuralNetwork, self).__init__()
        self.layer1 = nn.Linear(input_size, hidden_size)
        self.layer2 = nn.Linear(hidden_size, output_size)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.sigmoid(self.layer1(x))
        x = self.sigmoid(self.layer2(x))
        return x

def train_nn_with_weight_decay(X, y, hidden_size, epochs, learning_rate, lambda_):
    # Convert numpy arrays to PyTorch tensors
    X = torch.FloatTensor(X)
    y = torch.FloatTensor(y)

    input_size = X.shape[1]
    output_size = y.shape[1]

    # Create the model
    model = NeuralNetwork(input_size, hidden_size, output_size)

    # Define loss function and optimizer
    criterion = nn.BCELoss()
    optimizer = optim.SGD(model.parameters(), lr=learning_rate, weight_decay=lambda_)

    for epoch in range(epochs):
        # Forward pass
        outputs = model(X)
        loss = criterion(outputs, y)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch % 100 == 0:
            print(f"Epoch {epoch}, Loss: {loss.item()}")

    return model

# Example usage
if __name__ == "__main__":
    # Generate some example data (same as in the custom implementation)
    np.random.seed(0)
    X = np.random.randn(100, 2)
    y = np.array([(x[0] + x[1] > 0).astype(int) for x in X]).reshape(-1, 1)

    # Train the neural network
    trained_nn = train_nn_with_weight_decay(X, y, hidden_size=4, epochs=1000, learning_rate=0.1, lambda_=0.01)

    # Make predictions
    with torch.no_grad():
        predictions = trained_nn(torch.FloatTensor(X))
    print("Final predictions:", predictions[:5].numpy())

    # Compare with the custom implementation
    import train_nn_with_weight_decay as custom_train

    custom_nn = custom_train(X, y, hidden_size=4, epochs=1000, learning_rate=0.1, lambda_=0.01)
    custom_predictions = custom_nn.forward(X)
    print("Custom implementation predictions:", custom_predictions[:5])

    # Calculate mean squared error between PyTorch and custom predictions
    mse = np.mean((predictions.numpy() - custom_predictions) ** 2)
    print(f"Mean Squared Error between PyTorch and custom predictions: {mse}")

  from .autonotebook import tqdm as notebook_tqdm


Epoch 0, Loss: 0.6934700012207031
Epoch 100, Loss: 0.6093820929527283
Epoch 200, Loss: 0.48696351051330566
Epoch 300, Loss: 0.3752272427082062
Epoch 400, Loss: 0.3016279637813568
Epoch 500, Loss: 0.25658854842185974
Epoch 600, Loss: 0.22852538526058197
Epoch 700, Loss: 0.21040691435337067
Epoch 800, Loss: 0.19831696152687073
Epoch 900, Loss: 0.19003280997276306
Final predictions: [[0.95439345]
 [0.9636781 ]
 [0.8900036 ]
 [0.8641708 ]
 [0.66619074]]


ModuleNotFoundError: No module named 'neural_network_weight_decay'