In [1]:
import numpy as np
import nnfs
import matplotlib.pyplot as plt 
%matplotlib inline

In [2]:
class Layer_Dense: 
    def __init__(self, n_inputs, n_neurons):
        self.weights = 0.01 * np.random.randn(n_inputs, n_neurons) # I think in this order we're actually doing the transpose of what we had before 
        self.biases = np.zeros((1, n_neurons))
    def forward(self, inputs): 
        self.output = np.dot(inputs, self.weights) + self.biases
        return self.output

class Activation_ReLU:
    def forward(self, inputs):
        self.output = np.maximum(0, inputs)
        return self.output # all of these return values are not actually needed

class Activation_Softmax:
    def forward(self, inputs):
        self.output = np.exp(inputs) / np.sum(np.exp(inputs), axis=1, keepdims=True) #This keepdims=True command is SUPER USEFUL!
        return self.output

class Loss: 
    def calculate(self, predictions, results):
        sample_losses = self.forward(predictions, results)
        data_loss = np.mean(sample_losses)
        return data_loss

class Categorical_Cross_Entropy_Loss(Loss):
    def forward(self, y_pred, y_true):
        """
        First we clip the values of y, then we check to see if we have one-hot encoding or not. Our results
        will be an m x n matrix, where m = no. training examples, n = no. of classes being predicted. So for 1
        example we'll have our softmax activation being something like [0.5, 0.2, 0.3]. Let's say the real class
        was 2 (so 0.3) - What we want is -ln(0.5)*0 -ln(0.2)*0 -ln(0.3), or just -log() of the array at the index.
        That complex range(len)) expression is just indexing the matrix (row&column) and range(len)) ensures that we
        use every row from the predictions, and pick the right column (according to our true value. If it is 1-hot encoded,
        we just use element-wise multipliplication so in each row we'll have [0, x, 0] if the true value was 1 for example. 
        """
        clipped_pred = np.clip(y_pred, 1e-7, 1 - 1e-7) # this ensures no y-values are 0 (or 1), preventing log(0) 
        if len(y_true.shape) == 1: # if true, we're dealing with sparse, not 1-hot encoded values
            confidence_values = clipped_pred[range(len(clipped_pred)), y_true] # E.g., if y_true = 2 (class 2) then we only take the log of index 2 in prediction
        else:
            confidence_values = np.sum(clipped_pred * y_true, axis=1)
        negative_log_likelihoods = -np.log(confidence_values)
        return negative_log_likelihoods

from nnfs.datasets import spiral_data
nnfs.init() # sets random seed to 0a 
X, y = spiral_data(samples=100, classes=3) # X consists of coordinate pairs, y is classification 

layer1 = Layer_Dense(2,3)
layer1.forward(X)
activation1 = Activation_ReLU()
activation1.forward(layer1.output)
layer2 = Layer_Dense(3,3)
layer2.forward(activation1.output)
activation2 = Activation_Softmax()
activation2.forward(layer2.output)
my_loss = Categorical_Cross_Entropy_Loss()
my_loss.calculate(activation2.output, y)

1.0986104

In [3]:
class Layer_Dense: 
    def __init__(self, n_inputs, n_neurons):
        self.weights = 0.01 * np.random.randn(n_inputs, n_neurons)
        self.biases = np.zeros((1, n_neurons))
    def forward(self, inputs): 
        self.output = np.dot(inputs, self.weights) + self.biases

class Activation_ReLU:
    def forward(self, inputs):
        self.output = np.maximum(0, inputs)

class Activation_Softmax:
    def forward(self, inputs):
        self.output = np.exp(inputs) / np.sum(np.exp(inputs), axis=1, keepdims=True)

class Loss: 
    def calculate(self, predictions, results):
        sample_losses = self.forward(predictions, results)
        data_loss = np.mean(sample_losses)
        return data_loss

class Categorical_Cross_Entropy_Loss(Loss):
    def forward(self, y_pred, y_true):
        clipped_pred = np.clip(y_pred, 1e-7, 1 - 1e-7)
        if len(y_true.shape) == 1:
            confidence_values = clipped_pred[range(len(clipped_pred)), y_true]
        else:
            confidence_values = np.sum(clipped_pred * y_true, axis=1)
        negative_log_likelihoods = -np.log(confidence_values)
        return negative_log_likelihoods

class Accuracy:
    def calculate_accuracy(self, predictions, values):
        predicted_argmaxes = np.argmax(predictions, axis=1)
        if len(values.shape) == 2: 
            value_argmaxes = np.argmax(values, axis=1)
            return np.mean(predicted_argmaxes == value_argmaxes)
        else:
            return np.mean(predicted_argmaxes == values)
            
from nnfs.datasets import spiral_data
nnfs.init() 
X, y = spiral_data(samples=100, classes=3)

layer1 = Layer_Dense(2, 3)
layer1.forward(X)
activation1 = Activation_ReLU()
activation1.forward(layer1.output)
layer2 = Layer_Dense(3, 3)
layer2.forward(activation1.output)
activation2 = Activation_Softmax()
activation2.forward(layer2.output)
my_loss = Categorical_Cross_Entropy_Loss()
my_loss.calculate(activation2.output, y)

1.0986104