# **Introduction & Motivation**

I made a neural network from scratch based only on Python and NumPy so that I can truly realize how machine learning algorithms function. Instead of putting all my confidence in preimplemented libraries, I ensured that I implemented every integral part—from the weighted sums in neurons to the activation functions, loss calculation, and backpropagation. Coding these out on my own didn't just make me solidly understand linear algebra and optimization techniques, but my debugging and problem-solving skills became better as well.

By creating the network from scratch, I'm demonstrating that I am capable of taking theoretical mathematical concepts and applying them as efficient, functional code. This project is a reflection of my commitment to lifelong learning and passion for innovation. It is a representation of the challenges faced in real-world applications where developing scalable, interpretable models is of utmost importance. Hiring managers and recruiters will realize that I have the technical experience, interest, and hands-on exposure necessary to hold a Data Scientist or Machine Learning Engineer position.

# **Helper Functions**

In [8]:
import numpy as np

# Activation functions and their derivatives
def relu(x):
    """Applies the ReLU activation function."""
    return np.maximum(0, x)

def relu_derivative(x):
    """Computes the derivative of ReLU."""
    return np.where(x > 0, 1, 0)

def softmax(x):
    """Applies the softmax function row-wise."""
    exp_shifted = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exp_shifted / np.sum(exp_shifted, axis=1, keepdims=True)

# Loss function: cross-entropy
def cross_entropy_loss(y_pred, y_true):
    """
    Computes the cross-entropy loss.
    y_pred: predicted probabilities, shape (n_samples, n_classes)
    y_true: true labels as integers, shape (n_samples,)
    """
    m = y_pred.shape[0]
    # Using a small epsilon to avoid log(0)
    eps = 1e-15
    correct_logprobs = -np.log(y_pred[range(m), y_true] + eps)
    loss = np.sum(correct_logprobs) / m
    return loss


# **Build the Neural Network Class**

In [9]:
class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        # Using He initialization for the hidden layer.
        self.W1 = np.random.randn(input_size, hidden_size) * np.sqrt(2. / input_size)
        self.b1 = np.zeros((1, hidden_size))
        self.W2 = np.random.randn(hidden_size, output_size) * np.sqrt(2. / hidden_size)
        self.b2 = np.zeros((1, output_size))

    def forward(self, X):
        """
        Forward pass:
          - Compute first linear transformation (X.W1 + b1)
          - Apply ReLU activation
          - Compute second linear transformation (hidden_layer.W2 + b2)
          - Apply softmax to get probabilities
        """
        self.z1 = np.dot(X, self.W1) + self.b1
        self.a1 = relu(self.z1)
        self.z2 = np.dot(self.a1, self.W2) + self.b2
        self.a2 = softmax(self.z2)
        return self.a2

    def backward(self, X, y, output, learning_rate):
        """
        Backward pass (backpropagation):
          - Compute gradient on output layer (using cross-entropy loss derivative)
          - Backpropagate the gradient through the network using the chain rule
          - Update weights and biases using gradient descent
        """
        m = X.shape[0]

        delta2 = output.copy()
        delta2[range(m), y] -= 1
        delta2 /= m

        # Gradients for second layer weights and biases
        dW2 = np.dot(self.a1.T, delta2)
        db2 = np.sum(delta2, axis=0, keepdims=True)

        # Backpropagate into hidden layer
        delta1 = np.dot(delta2, self.W2.T) * relu_derivative(self.z1)
        dW1 = np.dot(X.T, delta1)
        db1 = np.sum(delta1, axis=0, keepdims=True)

        # Update parameters using gradient descent
        self.W1 -= learning_rate * dW1
        self.b1 -= learning_rate * db1
        self.W2 -= learning_rate * dW2
        self.b2 -= learning_rate * db2

    def train(self, X, y, epochs, learning_rate):
        """Train the network over a number of epochs."""
        for i in range(epochs):
            output = self.forward(X)
            loss = cross_entropy_loss(output, y)
            self.backward(X, y, output, learning_rate)
            if i % 100 == 0:
                print(f"Epoch {i}, Loss: {loss:.4f}")


# **Testing the Network on a Simple Dataset**

In [10]:
# XOR dataset
X = np.array([
    [0, 0],
    [0, 1],
    [1, 0],
    [1, 1]
])

# For XOR, we’ll define two classes: class 0 and class 1.
# The XOR output is 0 for [0,0] and [1,1], and 1 for [0,1] and [1,0].
y = np.array([0, 1, 1, 0])

# Define network parameters
input_size = 2     # 2 inputs (features)
hidden_size = 4    # number of neurons in the hidden layer (you can experiment here)
output_size = 2    # 2 output classes (0 and 1)
learning_rate = 0.1
epochs = 10000

# Initialize and train the network
nn = NeuralNetwork(input_size, hidden_size, output_size)
nn.train(X, y, epochs, learning_rate)

# Test the network's predictions after training
predictions = nn.forward(X)
print("\nPredicted probabilities:")
print(predictions)
print("\nPredicted classes:")
print(np.argmax(predictions, axis=1))


Epoch 0, Loss: 0.7444
Epoch 100, Loss: 0.6931
Epoch 200, Loss: 0.6930
Epoch 300, Loss: 0.6929
Epoch 400, Loss: 0.6927
Epoch 500, Loss: 0.6922
Epoch 600, Loss: 0.6910
Epoch 700, Loss: 0.6877
Epoch 800, Loss: 0.6750
Epoch 900, Loss: 0.5717
Epoch 1000, Loss: 0.2882
Epoch 1100, Loss: 0.1555
Epoch 1200, Loss: 0.0941
Epoch 1300, Loss: 0.0637
Epoch 1400, Loss: 0.0468
Epoch 1500, Loss: 0.0363
Epoch 1600, Loss: 0.0293
Epoch 1700, Loss: 0.0245
Epoch 1800, Loss: 0.0208
Epoch 1900, Loss: 0.0181
Epoch 2000, Loss: 0.0159
Epoch 2100, Loss: 0.0142
Epoch 2200, Loss: 0.0128
Epoch 2300, Loss: 0.0116
Epoch 2400, Loss: 0.0106
Epoch 2500, Loss: 0.0098
Epoch 2600, Loss: 0.0091
Epoch 2700, Loss: 0.0085
Epoch 2800, Loss: 0.0079
Epoch 2900, Loss: 0.0074
Epoch 3000, Loss: 0.0070
Epoch 3100, Loss: 0.0066
Epoch 3200, Loss: 0.0062
Epoch 3300, Loss: 0.0059
Epoch 3400, Loss: 0.0056
Epoch 3500, Loss: 0.0054
Epoch 3600, Loss: 0.0051
Epoch 3700, Loss: 0.0049
Epoch 3800, Loss: 0.0047
Epoch 3900, Loss: 0.0045
Epoch 4000, 

# Generating Synthetic Data to test

In [11]:
import numpy as np

def generate_synthetic_data():
    """
    Generate a synthetic dataset with 3 classes.
    Each class will have points clustered around a different center.
    """
    np.random.seed(42)
    num_samples = 100

    # Class 0: centered around (0, 0)
    X0 = np.random.randn(num_samples, 2) + np.array([0, 0])
    y0 = np.zeros(num_samples, dtype=int)

    # Class 1: centered around (5, 5)
    X1 = np.random.randn(num_samples, 2) + np.array([5, 5])
    y1 = np.ones(num_samples, dtype=int)

    # Class 2: centered around (-5, 5)
    X2 = np.random.randn(num_samples, 2) + np.array([-5, 5])
    y2 = np.full(num_samples, 2, dtype=int)

    # Combine the data
    X = np.vstack((X0, X1, X2))
    y = np.concatenate((y0, y1, y2))

    return X, y

# Generate the dataset
X, y = generate_synthetic_data()


In [12]:
# Define an accuracy function for evaluation
def accuracy(y_pred, y_true):
    predictions = np.argmax(y_pred, axis=1)
    return np.mean(predictions == y_true)

# Set network parameters for the synthetic dataset
input_size = 2
hidden_size = 10
output_size = 3
learning_rate = 0.01
epochs = 1000

# Initialize the Neural Network
nn = NeuralNetwork(input_size, hidden_size, output_size)
nn.train(X, y, epochs, learning_rate)

# Evaluate the network on the training data
predictions = nn.forward(X)
print("Accuracy on synthetic dataset:", accuracy(predictions, y))


Epoch 0, Loss: 11.9137
Epoch 100, Loss: 0.4240
Epoch 200, Loss: 0.2610
Epoch 300, Loss: 0.1921
Epoch 400, Loss: 0.1568
Epoch 500, Loss: 0.1315
Epoch 600, Loss: 0.1124
Epoch 700, Loss: 0.0976
Epoch 800, Loss: 0.0859
Epoch 900, Loss: 0.0764
Accuracy on synthetic dataset: 1.0


Try testing on real world datasets after forking the repo:

In [13]:
from tensorflow.keras.datasets import mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()
