In [1]:

# run for a complete overview

import numpy as np
import nnfs
from nnfs.datasets import spiral_data

nnfs.init()

class Layer_Dense:
    def __init__(self, n_inputs, n_neurons):
        # init the weights and biases
        
        # set the weights
        # https://www.mathsisfun.com/data/standard-normal-distribution.html, default mean = 0 and variance =1 
        # this just creates a random from -3 to 3 , but multiplies it by 0.01 to gain a faster time to calculate
        # NN prefer data between 1 and -1
        self.weights = 0.01 * np.random.randn(n_inputs, n_neurons)
 
        # create an array of shape (1, n_neurons) filled with zeros
        self.biases = np.zeros(( 1 , n_neurons))
     
    def forward(self, inputs):
        # forward pass performs the dot added to the biases of course

        # inputs here would be various rows of input features
        # each row is multplied with the weights which are of shape (input, neuron (i.ie outputs of this))
        # this gets eatch row in the batch and applies the corresponding weights accordingly which alterrs the
        # dimensionality (inputs, neurons) regardless
        # inputs = np.dot([[10,10],
        #                    [5,5]], [[10,5,1],
        #                            [5,2,1]]) = [[10(10) + 5(5), 10(5) + 5(2), ... ],...]
        self.output = np.dot(inputs, self.weights) + self.biases
       
        

# activations for the dense layer 
class Activation_ReLU:
    
    #forward pass
    def forward(self, inputs):
        #calculate output values from inputs, relu just takes max of 0 and the input whatever is larger
        self.output = np.maximum(0, inputs)
        
        
# use exponential as its always positive and exagerrates the difference of inputs more as its a power        
class Activation_Softmax:
    #forward pass
    
    def forward(self, inputs):
        
        # get unnormalized probabilities
        
        # takes e^(inputs - largest value from inputs)
        
        # take the max value to become 0 , so its output is always 1 to indicate the highest 
        # prevent dead neurons and large exploding numbers
        # if this was not done, then one value could be massive and make the sum
        
        # subtracting largest value from list of input values 
        # changes the output values to be from -inf to 0 always which keeps the output of exponential being 0 to 1

        # purpose of exp > monotonic (higher input , higher output), caps to 0 > 1 as said above, focuses more on change 
        # rather than magnitude between each.
        exp_values = np.exp(inputs - np.max(inputs, axis = 1, keepdims = True))
      
       
        # normalize them for each sample, takes all values and divides by the sum 
        probabilties = exp_values / np.sum(exp_values, axis = 1, keepdims = True)
      
        self.output = probabilties
 

# Common loss class
class Loss :
    # Calculates the data and regularization losses
    # given model output and ground truth values
    def calculate ( self , output , y ):
        # Calculate sample losses
        sample_losses = self.forward(output, y)
        # Calculate mean loss
        data_loss = np.mean(sample_losses)
        # Return loss
        return data_loss
    
# Cross-entropy loss
class Loss_CategoricalCrossentropy ( Loss ):
# Forward pass
    def forward ( self , y_pred , y_true ):
        
        # Number of samples in a batch
        samples = len (y_pred)
        
        # Clip data to prevent division by 0
        # Clip both sides to not drag mean towards any value
        
        #np.clip just removes value lower and higher than the parameters respectively.
        
        y_pred_clipped = np.clip(y_pred, 1e-7 , 1 - 1e-7 )

        # Probabilities for target values -
        # only if categorical labels
        if len (y_true.shape) == 1 :
            correct_confidences = y_pred_clipped[range (samples),y_true]
            
        # Mask values - only for one-hot encoded labels
        # shape = 2 means a 2D array of course
        # sum the values along with themselves as to select them all.
        elif len (y_true.shape) == 2 :
            correct_confidences = np.sum(y_pred_clipped * y_true,axis = 1)

        # Losses
        # selected the values the network has outputted
        # just calculate the negative log loss
        # this just returns greater loss the closer the value is to 0, and less loss for closer to 1 i.e. the truth value
        negative_log_likelihoods = - np.log(correct_confidences)
        return negative_log_likelihoods
        
# returns 2 coordinates as features that represent each of classes
# repeats this for how many times the samples go down.
# attemps to consruct the original graph and colour based on when we get to where we assign labels to it if supervised.
X, y = spiral_data(samples = 4, classes = 2)
print(X)
print(y)

# 2 input features and 3 output values
dense1 = Layer_Dense(2,3)

# create relu activation to be used in dense layer

activation1 = Activation_ReLU()

# Create second Dense layer with 3 input features (as we take output
# of previous layer here) and 3 output values (output values)
dense2 = Layer_Dense( 3 , 3 )

# Create Softmax activation (to be used with Dense layer):
activation2 = Activation_Softmax()

# Create loss function
loss_function = Loss_CategoricalCrossentropy()

# Make a forward pass of our training data through this layer
dense1.forward(X)

# Make a forward pass through activation function
# it takes the output of first dense layer here
activation1.forward(dense1.output)

# Make a forward pass through second Dense layer
# it takes outputs of activation function of first layer as inputs
dense2.forward(activation1.output)

# Make a forward pass through activation function
# it takes the output of second dense layer here
activation2.forward(dense2.output)

# the largest one here is the class to select
#print (activation2.output)

# Perform a forward pass through loss function
# it takes the output of second dense layer here and returns loss
loss = loss_function.calculate(activation2.output, y)
# Print loss value
print ( 'loss:' , loss) # at the momennt the weights are all random so confidences will be approximately 0.33 across the board.

# accuracy is just sayng how much we got right
# first calculates the index with the largest value 
predictions = np.argmax(activation2.output, axis = 1 )

# if 2 dimensionsal as in , if its not a list, which in this case ,its not
# flattens if required
if len (y.shape) == 2 :

    y = np.argmax(y, axis = 1 )

    
# compare each index, as predictiosn == y returns 1 or 0 for each column then just mean this for total accuracy.
accuracy = np.mean(predictions == y)
# Print accuracy
print ( 'acc:' , accuracy)

[[ 0.          0.        ]
 [-0.12729016 -0.30807194]
 [ 0.51077586  0.4284302 ]
 [-0.9922136   0.12454799]
 [-0.         -0.        ]
 [ 0.09158143  0.32050577]
 [-0.66041565 -0.09108007]
 [ 0.8794786   0.47593853]]
[0 0 0 0 1 1 1 1]
loss: 1.0986145
acc: 0.375


In [1]:
import numpy as np
# Probabilities of 3 samples
softmax_outputs = np.array([[ 0.7 , 0.2 , 0.1 ],
                            [ 0.5 , 0.1 , 0.4 ],
                            [ 0.02 , 0.9 , 0.08 ]])

# Target (ground-truth) labels for 3 samples
class_targets = np.array([ 0 , 1 , 1 ])

# Calculate values along second axis (across), remember argmax returns the index with the highest value
predictions = np.argmax(softmax_outputs, axis = 1 )

# If targets are one-hot encoded - convert them
if len (class_targets.shape) == 2 :
	class_targets = np.argmax(class_targets, axis = 1 )  
    
# True evaluates to 1; False to 0
accuracy = np.mean(predictions == class_targets)
print ( 'acc:' , accuracy)

acc: 0.6666666666666666


In [19]:
import numpy as np
 

softmax_outputs = np.array([[ 0.7 , 0.1 , 0.2 ],
                            [ 0.1 , 0.5 , 0.4 ],
                            [ 0.02 , 0.9 , 0.08 ]])

class_targets = np.array([[ 1 , 0 , 0 ],
                        [ 0 , 1 , 0 ],
                        [ 0 , 1 , 0 ]])
class_targets = np.array([1,0,0])

print(softmax_outputs[range(len(softmax_outputs)), class_targets])


[0.1  0.1  0.02]
