Creating The Neural Network

In [10]:
import numpy as np

class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size, dropout_prob=0.5, reg_lambda=0.01):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_prob = dropout_prob
        self.reg_lambda = reg_lambda
        
        # Initialize weights and biases
        self.W1 = np.random.randn(hidden_size, input_size) * 0.01
        self.b1 = np.zeros((hidden_size, 1))
        self.W2 = np.random.randn(output_size, hidden_size) * 0.01
        self.b2 = np.zeros((output_size, 1))

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def sigmoid_derivative(self, z):
        return self.sigmoid(z) * (1 - self.sigmoid(z))

    def compute_cost(self, X, y):
        m = X.shape[1]  # number of samples

        # Forward propagation
        Z1 = np.dot(self.W1, X) + self.b1
        A1 = self.sigmoid(Z1)
        
        # Dropout (only during training)
        dropout_mask = np.random.rand(*A1.shape) < self.dropout_prob
        A1 = A1 * dropout_mask / self.dropout_prob
        
        Z2 = np.dot(self.W2, A1) + self.b2
        y_hat = self.sigmoid(Z2)

        # Compute cross-entropy loss
        logprobs = np.multiply(-np.log(y_hat), y) + np.multiply(-np.log(1 - y_hat), 1 - y)
        cross_entropy_cost = 1./m * np.sum(logprobs)
        
        # L2 regularization
        L2_regularization_cost = self.reg_lambda/(2*m) * (np.sum(np.square(self.W1)) + np.sum(np.square(self.W2)))

        # Total cost
        cost = cross_entropy_cost + L2_regularization_cost

        return cost

    def train(self, X, y, learning_rate=0.01, num_iterations=1000):
        m = X.shape[1]  # number of samples

        for i in range(num_iterations):
            # Forward propagation
            Z1 = np.dot(self.W1, X) + self.b1
            A1 = self.sigmoid(Z1)
            
            # Dropout (only during training)
            dropout_mask = np.random.rand(*A1.shape) < self.dropout_prob
            A1 = A1 * dropout_mask / self.dropout_prob
            
            Z2 = np.dot(self.W2, A1) + self.b2
            y_hat = self.sigmoid(Z2)

            # Backpropagation
            dZ2 = y_hat - y
            dW2 = 1./m * np.dot(dZ2, A1.T) + (self.reg_lambda/m) * self.W2
            db2 = 1./m * np.sum(dZ2, axis=1, keepdims=True)
            dA1 = np.dot(self.W2.T, dZ2)
            dA1 = dA1 * dropout_mask / self.dropout_prob  # Backpropagate through dropout
            dZ1 = dA1 * self.sigmoid_derivative(Z1)
            dW1 = 1./m * np.dot(dZ1, X.T) + (self.reg_lambda/m) * self.W1
            db1 = 1./m * np.sum(dZ1, axis=1, keepdims=True)

            # Update weights and biases
            self.W1 -= learning_rate * dW1
            self.b1 -= learning_rate * db1
            self.W2 -= learning_rate * dW2
            self.b2 -= learning_rate * db2

    def predict(self, X):
        # Forward propagation
        Z1 = np.dot(self.W1, X) + self.b1
        A1 = self.sigmoid(Z1)
        Z2 = np.dot(self.W2, A1) + self.b2
        y_hat = self.sigmoid(Z2)
        
        return y_hat

In [11]:
# Example usage:
# Let's create a simple dataset for XOR problem
X_train = np.array([[0, 0, 1, 1], [0, 1, 0, 1]])  # Inputs
y_train = np.array([[0, 1, 1, 0]])               # Corresponding outputs

# Create a neural network with dropout and L2 regularization
input_size = 2
hidden_size = 2
output_size = 1
dropout_prob = 0.5
reg_lambda = 0.01
nn = NeuralNetwork(input_size, hidden_size, output_size, dropout_prob, reg_lambda)

# Train the neural network
nn.train(X_train, y_train)

# Make predictions
predictions = nn.predict(X_train)
print("Predictions:", predictions)

Predictions: [[0.50174426 0.50173744 0.50172498 0.50171816]]
