In [1]:
import numpy as np

class FeedForwardNN:
    def __init__(self, input_size, hidden_size, output_size):
        # Initialize the sizes of the layers
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        # Initialize weights and biases with random values
        self.weights_input_hidden = np.random.randn(self.input_size, self.hidden_size)
        self.bias_hidden = np.random.randn(1, self.hidden_size)
        self.weights_hidden_output = np.random.randn(self.hidden_size, self.output_size)
        self.bias_output = np.random.randn(1, self.output_size)
        
    def sigmoid(self, x):
        # Sigmoid activation function
        return 1 / (1 + np.exp(-x))
    
    def sigmoid_derivative(self, x):
        # Derivative of sigmoid function
        return x * (1 - x)
    
    def feedforward(self, X):
        # Feedforward step to compute the output
        self.hidden_layer_input = np.dot(X, self.weights_input_hidden) + self.bias_hidden
        self.hidden_layer_output = self.sigmoid(self.hidden_layer_input)
        self.final_input = np.dot(self.hidden_layer_output, self.weights_hidden_output) + self.bias_output
        self.final_output = self.sigmoid(self.final_input)
        return self.final_output
    
    def backpropagate(self, X, y, learning_rate):
        # Backpropagation to adjust weights and biases
        output_error = y - self.final_output
        output_delta = output_error * self.sigmoid_derivative(self.final_output)
        
        hidden_error = output_delta.dot(self.weights_hidden_output.T)
        hidden_delta = hidden_error * self.sigmoid_derivative(self.hidden_layer_output)
        
        # Update weights and biases using the gradient descent algorithm
        self.weights_hidden_output += self.hidden_layer_output.T.dot(output_delta) * learning_rate
        self.bias_output += np.sum(output_delta, axis=0, keepdims=True) * learning_rate
        self.weights_input_hidden += X.T.dot(hidden_delta) * learning_rate
        self.bias_hidden += np.sum(hidden_delta, axis=0, keepdims=True) * learning_rate
    
    def train(self, X, y, epochs, learning_rate):
        # Training the network
        for epoch in range(epochs):
            output = self.feedforward(X)
            self.backpropagate(X, y, learning_rate)
            if epoch % 1000 == 0:
                loss = np.mean(np.square(y - output))
                print(f"Epoch {epoch}, Loss: {loss}")
    
    def predict(self, X):
        # Make predictions based on trained weights
        return self.feedforward(X)

# Example usage
if __name__ == "__main__":
    # Example data: XOR problem
    X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])  # Input
    y = np.array([[0], [1], [1], [0]])  # Output
    
    # Initialize network
    input_size = X.shape[1]
    hidden_size = 4  # Arbitrary choice
    output_size = y.shape[1]
    
    nn = FeedForwardNN(input_size, hidden_size, output_size)
    
    # Train the network
    nn.train(X, y, epochs=10000, learning_rate=0.1)
    
    # Make predictions
    print("Predictions after training:")
    print(nn.predict(X))


Epoch 0, Loss: 0.26670396299824684
Epoch 1000, Loss: 0.1540254815108137
Epoch 2000, Loss: 0.04303202591964192
Epoch 3000, Loss: 0.015796229758195654
Epoch 4000, Loss: 0.008578977599871425
Epoch 5000, Loss: 0.00564595392270523
Epoch 6000, Loss: 0.004126342835154477
Epoch 7000, Loss: 0.003216663286899577
Epoch 8000, Loss: 0.0026183605974621944
Epoch 9000, Loss: 0.0021981159948969847
Predictions after training:
[[0.03470066]
 [0.97192206]
 [0.94188577]
 [0.04672807]]
