In [2]:
import pandas as pd

In [4]:
# CREATING NEURAL NET FROM SCRATCH

import numpy as np

class Layer_Dense:
    def __init__(self, n_inputs, n_neurons):
        
    # - initiliaze weights and biases
    # - weights have a multiplicative effect (*)
    # - biases have an additive effect (+)

    # - generates matrix of random values sampled from a standard normal 
    # - distribution (mean 0, variance 1)
    # - dimensions of the matrix are n_inputs x n_neurons 

    # - neuron has a unique weight that begins with a random small value 
    #     - here we are also multiplying by 0.1 to scale down the weights to a smaller range 
    #     and avoid values that might range quite broadly as a result of random.randn output
    #         - having smaller weights ensures that the network starts with smaller values and
    #         prevents any one neuron from dominating early on
    #         - if the outputs are too large to begin with, the outputs could saturate the actiavtion function (especially non-linear activations like sigmoid or ReLu) --> leading to issues such as vanishing / exploding gradient
        
        self.weights = 0.1 * np.random.randn(n_inputs, n_neurons)
  
- this portion initliazes the biases for each neuron to zero
- unlike weights, biases are typically initialized to zero 
    
    
        self.biases = np.zeros((1,n_neurons))
        




In [None]:
import numpy as np

class Layer_Dense:
    def __init__(self, n_inputs, n_neurons):
        # initialize weights and biases
        self.weights = (0.1) * np.random.randn(n_inputs, n_neurons)
        self.biases = np.zeros((1, n_neurons))
        
        
    def forward(self, inputs):
        # calculate outpput values from inputs, weights and biases
        self.inputs = inputs
        self.output = np.dot(inputs,self.weights) + self.biases
        
        
    def backward(self, dvalues):
        # gradient on values 
        self.dinputs = dvalues.copy()
        self.dinputs[self.inputs <= 0] = 0
    
# Softmax activation function
class Activation_Softmax:
    def forward(self, inputs):
        # subtracting max for stability 
        exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))
        probabilities = exp_values / np.sum(exp_values, axis=1, keepdims=True)
        self.output = probabilities
        
    def backward(self, dvalues):
        # create an array with the same shape as the softmax output
        self.dinputs = np.empty_like(dvalues)
        for index, (single_output, single_dvalues) in enumerate(zip(self.output, dvalues)):
            # flatten output array
            single_output = single_output.reshape(-1,1)
            # calculate Jacobian matrix for the softmax function
            jacobian_matrix = np.diagflat(single_output) - np.dot(single_output, single_output.T)
            # calculate sample-wise gradient
            self.dinputs[index] = np.dot(jacobian_matrix, single_dvalues)

# Loss function: Categorical Cross-Entropy

In [1]:
import numpy as np
import nnfs
from nnfs.datasets import spiral_data

nnfs.init()

# Dense layer class
class Layer_Dense:
    def __init__(self, n_inputs, n_neurons):
        # Initialize weights and biases
        self.weights = 0.1 * np.random.randn(n_inputs, n_neurons)
        self.biases = np.zeros((1, n_neurons))
    
    def forward(self, inputs):
        # Calculate output values from inputs, weights and biases
        self.inputs = inputs
        self.output = np.dot(inputs, self.weights) + self.biases
    
    def backward(self, dvalues):
        # Gradients on parameters
        self.dweights = np.dot(self.inputs.T, dvalues)
        self.dbiases = np.sum(dvalues, axis=0, keepdims=True)
        # Gradient on values to pass to previous layer
        self.dinputs = np.dot(dvalues, self.weights.T)

# ReLU activation function
class Activation_ReLu:
    def forward(self, inputs):
        # Remember input values
        self.inputs = inputs
        # Calculate output values from inputs
        self.output = np.maximum(0, inputs)

    def backward(self, dvalues):
        # Gradient on values
        self.dinputs = dvalues.copy()
        self.dinputs[self.inputs <= 0] = 0

# Softmax activation function
class Activation_Softmax:
    def forward(self, inputs):
        # Subtracting max for stability
        exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))
        probabilities = exp_values / np.sum(exp_values, axis=1, keepdims=True)
        self.output = probabilities

    def backward(self, dvalues):
        # Create an array with the same shape as the softmax output
        self.dinputs = np.empty_like(dvalues)
        for index, (single_output, single_dvalues) in enumerate(zip(self.output, dvalues)):
            # Flatten output array
            single_output = single_output.reshape(-1, 1)
            # Calculate Jacobian matrix for the softmax function
            jacobian_matrix = np.diagflat(single_output) - np.dot(single_output, single_output.T)
            # Calculate sample-wise gradient
            self.dinputs[index] = np.dot(jacobian_matrix, single_dvalues)

# Loss function: Categorical Cross-Entropy
class Loss_CategoricalCrossentropy:
    def forward(self, y_pred, y_true):
        # Number of samples
        samples = len(y_pred)
        # Clip data to prevent division by zero
        y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)
        
        # Probabilities for target values
        if len(y_true.shape) == 1:
            correct_confidences = y_pred_clipped[range(samples), y_true]
        elif len(y_true.shape) == 2:
            correct_confidences = np.sum(y_pred_clipped * y_true, axis=1)
        
        # Losses
        negative_log_likelihoods = -np.log(correct_confidences)
        return np.mean(negative_log_likelihoods)
    
    def backward(self, dvalues, y_true):
        # Number of samples
        samples = len(dvalues)
        # Number of labels in every sample
        labels = len(dvalues[0])
        
        # If labels are sparse, turn them into one-hot encoded vectors
        if len(y_true.shape) == 1:
            y_true = np.eye(labels)[y_true]
        
        # Calculate gradient
        self.dinputs = -y_true / dvalues
        # Normalize the gradient
        self.dinputs = self.dinputs / samples

# Optimizer: Stochastic Gradient Descent (SGD)
class Optimizer_SGD:
    def __init__(self, learning_rate=1.0):
        self.learning_rate = learning_rate
    
    def update_params(self, layer):
        # Update weights and biases using the gradients calculated during backpropagation
        layer.weights -= self.learning_rate * layer.dweights
        layer.biases -= self.learning_rate * layer.dbiases

# Training dataset
X, y = spiral_data(samples=100, classes=3)

# Create a dense layer with 2 input features and 3 output values
dense1 = Layer_Dense(2, 3)
activation1 = Activation_ReLu()

# Create a second dense layer with 3 input features and 3 output values
dense2 = Layer_Dense(3, 3)
activation2 = Activation_Softmax()

# Loss function
loss_function = Loss_CategoricalCrossentropy()

# Optimizer
optimizer = Optimizer_SGD(learning_rate=0.1)

# Forward pass through first layer
dense1.forward(X)
activation1.forward(dense1.output)

# Forward pass through second layer
dense2.forward(activation1.output)
activation2.forward(dense2.output)

# Loss calculation
loss = loss_function.forward(activation2.output, y)
print("Loss:", loss)

# Backward pass
loss_function.backward(activation2.output, y)
activation2.backward(loss_function.dinputs)
dense2.backward(activation2.dinputs)
activation1.backward(dense2.dinputs)
dense1.backward(activation1.dinputs)

# Update parameters using the optimizer
optimizer.update_params(dense1)
optimizer.update_params(dense2)

# Output first layer's first few weights to check update
print("Updated first layer weights:\n", dense1.weights[:5])


Loss: 1.098445
Updated first layer weights:
 [[-0.13079816  0.1657304  -0.01186143]
 [-0.06819189  0.0666257  -0.04603613]]
