<a href="https://colab.research.google.com/github/theaveas/DeepLearning/blob/main/NNFS/05_nnfs_calculating_network_error_with_loss.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Calculating Network Error with Loss
`Loss function`, also referred to as the `cost function`, is the algorithm that quantifies how wrong a model is.\
`Loss` is the measure of this metric. we ideally want it to be 0.

In [2]:
import platform
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

print(platform.python_version())
print(np.__version__)
print(matplotlib. __version__)

#python version 3.9.7
#numpy version 1.21.2
#matplotlib version 3.5.0

3.7.12
1.19.5
3.2.2


In [1]:
# install library
!pip install nnfs

Collecting nnfs
  Downloading nnfs-0.5.1-py3-none-any.whl (9.1 kB)
Installing collected packages: nnfs
Successfully installed nnfs-0.5.1


## Categorical Cross-Entropy Loss
Is explicitly used to compare a `groud-truth` probability(y or labels) and some `predicted` distribution  (y-hat or predictions)
\
`L = - sum(yi,j * log(yhat i,j)` 

In [4]:
# log loss(correct_class_confidence)
# Li = -log(yhat i,k)  # where k is an index of "true" probability
softmax_output = [0.7, 0.1, 0.2]

# targets of [1, 0, 0]
m = len(softmax_output)
loss = - np.sum(1 * np.log(0.7) + 0 * np.log(0.1) + 0 * np.log(0.2))
print(loss)

0.35667494393873245


In [5]:
# the categorical cross entropy loss account for the larger the outputs the larger the confidence is
output = [1., 0.95, 0.9, 0.8, 0.2, 0.1, 0.05, 0.01]

for conf in output:
    print(-np.log(conf))

-0.0
0.05129329438755058
0.10536051565782628
0.2231435513142097
1.6094379124341003
2.3025850929940455
2.995732273553991
4.605170185988091


In [6]:
# log is short for logarithm
# logarithm with e as its base is referred to as the "natural logarithm"
# this equation solve the term e**x = b, e**x = 5.2 is solved by log(5.2)

b = 5.2
print(np.log(b))
print('e**1.6486586255873816 is ', np.exp(1.6486586255873816))

1.6486586255873816
e**1.6486586255873816 is  5.2


In [10]:
# calculate categorical cross-entropy loss
# consider the output from softmax activation function is 
softmax_outputs = np.array([[0.7, 0.1, 0.2], 
                             [0.1, 0.5, 0.4], 
                             [0.02, 0.9, 0.08]])

# with three training example with three prediction probability
# the target is dog, cat, cat 
# 0 = dog, 1 = cat, 2 = human
class_targets = np.array([0, 1, 1])

for targ_idx, distribution in zip(class_targets, softmax_outputs):
    print(distribution[targ_idx])

0.7
0.5
0.9


In [12]:
# print distribution using numpy array
print(softmax_outputs[range(len(softmax_outputs)), class_targets]) # y i,j where i is the index of the output, j is the index of the class target

# calculate the categorical cross-entropy loss
print('softmax output probability distribution', -np.log(softmax_outputs[range(len(softmax_outputs)), class_targets]))

[0.7 0.5 0.9]
softmax output probability distribution [0.35667494 0.69314718 0.10536052]


In [13]:
# What we need is the average loss per batch to have an idea how the model perform
# average equation sum(iterable) / len(iterable)

m = len(softmax_outputs)
average_loss = 1/m * np.sum(-np.log(softmax_outputs[range(len(softmax_outputs)), class_targets]))
print('The average loss of this batch', average_loss)

The average loss of this batch 0.38506088005216804


In [14]:
# different between sparse data and one hot encoded
# same as our class target [dog, cat, cat]
sparse_target = np.array([0, 1, 1])

one_hot_encoded = np.array([[1,0,0],
                            [0,1,0],
                            [0,1,0]])
print('len of sparse class:', len(sparse_target.shape))
print('len of sparse class:', len(one_hot_encoded.shape))

len of sparse class: 1
len of sparse class: 2


In [16]:
# encounter different target class types (sparse, one hot encoded)

# probabilities for target values only if categorical labes
if len(class_targets.shape) == 1:
    correct_conf = softmax_outputs[range(len(softmax_outputs)), class_targets]
# one hot encoded
elif len(class_targets.shape) == 2:
    correct_conf = np.sum(softmax_outputs * class_targets, axis=1, keepdims=True)

# compute loss
m = len(softmax_outputs)
average_loss = 1/m * np.sum(-np.log(softmax_outputs[range(len(softmax_outputs)), class_targets]))
print(average_loss)

0.38506088005216804


In [17]:
# -np.log(0) = inf
# >>> __main__:1: RuntimeWarning: divide by zero encountered in log
# >>> inf

# the problem is np.mean([1, 2, 3, -np.log(0)])
# >>> inf

# we could add a very small value to the confidence to prevent it from being a zero 1e-7
print(-np.log(1e-7))

# but this could impact the result insignificantly, 
# and in case where the conf value is 1: conf in the correct label loss becomes a negative value instead of being 0
print(-np.log(1 + 1e-7))

# to prevent this from happening instead of being 1 + 1e-7, will become 1-1e-7(so slightly less than 1)
def y_pred_clipped(y_pred):
    return np.clip(y_pred, 1e-7, 1-1e-7)

16.11809565095832
-9.999999505838704e-08


# The Categoriacal Cross-Entropy Loss Class

In [18]:
# common loss class
class Loss:

    def calc(self, output, y):
        """ Calculate the data and regularization losses
        Input: 
            output : The output of the activation function
            y : Class targets
            
        Output:
            data_loss : Average Loss
        """
        data_loss = self.forward(output, y)
        
        return data_loss

In [19]:
# categorical cross entropy loss 
class Loss_CategoricalCrossentropy(Loss):
    
    def forward(self, y_pred, y):
        """Calculate Cross-entropy loss
        Input : 
            y_pred : The output of the activation function
            y : Class targets
            
        Output :
            average_loss : Average Loss
        """
        # len of training example
        m = len(y_pred)
        
        # clip data to prevent divison by 0
        # clip both sides to not drag mean toward any values
        y_pred_clipped = np.clip(y_pred, 1e-7, 1-1e-7)
        
        # probabilities for target values only if categorical labels
        if len(y.shape) == 1:
            correct_conf = y_pred_clipped[range(len(y_pred_clipped)), y]
        # one hot encoded
        elif len(y.shape) == 2:
            correct_conf = np.sum(y_pred_clipped * y, axis=1, keepdims=True)

        # compute loss
        average_loss = 1/m * np.sum(-np.log(y_pred_clipped[range(len(y_pred_clipped)), y]))
        
        return average_loss

In [20]:
# testing
loss_function = Loss_CategoricalCrossentropy()
loss = loss_function.forward(softmax_outputs, class_targets)
print(loss)

0.38506088005216804


## Accuracy Calculation
Accuracy describes how often the largest confidence is the correct class in terms of a fraction

In [21]:
# calculate values along second axis (axis of index 1)
class_targets = np.array([0,1,1])
preds = np.argmax(softmax_outputs, axis=1)
# convert one-hot encoded
if len(class_targets.shape) == 2:
    class_targets = np.argmax(class_targets, axis=1)

# true evaluates to 1; false to 0
acc = np.mean(preds==class_targets)
print(acc)

1.0


---
## Combining everything up to this point

In [22]:
# import dataset 
import nnfs
from nnfs.datasets import spiral_data

# set random seed to 0, create float32 dtype, overrides the original dot product from Numpy
nnfs.init()

In [23]:
class Dense:
    def __init__(self, n_inputs, n_neurons):
        """ Initialize the weights and biases of each neurons
        n_inputs = number of input features
        n_neurons = number of desired neurons
        """
        # using np.random.randn and * 0.01 is to break the symetry of the neurons
        self.weights = np.random.randn(n_inputs, n_neurons) * 0.01
        # biases can be initialize as zeros
        self.biases = np.zeros((1, n_neurons))
    
    def forward(self, inputs):
        """ Calculate the output layer using The Dot product of input feature and weight plus bias
        Input:
        inputs = Training examples
        
        Output:
        output = Output of the training example
        """
        # calculate the output layer
        output = np.dot(inputs, self.weights) + self.biases
        
        return output

In [24]:
# ReLU activation
class Activation_ReLU:
    def forward(self, inputs):
        output = np.maximum(0, inputs)
        
        return output

In [25]:
# Sotfmax activation
class Activation_Softmax:
    def forward(self, inputs):
        # input - np.max to prevent the exponential function from overflowing
        exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))
        
        softmax = exp_values / np.sum(exp_values, axis=1, keepdims=True)
        return softmax

In [26]:
# common loss class
class Loss:

    def calc(self, output, y):
        """ Calculate the data and regularization losses
        Input: 
            output : The output of the activation function
            y : Class targets
            
        Output:
            data_loss : Average Loss
        """
        data_loss = self.forward(output, y)
        
        return data_loss

In [27]:
# categorical cross entropy loss 
class Loss_CategoricalCrossentropy(Loss):
    
    def forward(self, y_pred, y):
        """Calculate Cross-entropy loss
        Input : 
            y_pred : The output of the activation function
            y : Class targets
            
        Output :
            average_loss : Average Loss
        """
        # len of training example
        m = len(y_pred)
        
        # clip data to prevent divison by 0
        # clip both sides to not drag mean toward any values
        y_pred_clipped = np.clip(y_pred, 1e-7, 1-1e-7)
        
        # probabilities for target values only if categorical labels
        if len(y.shape) == 1:
            correct_conf = y_pred_clipped[range(len(y_pred_clipped)), y]
        # one hot encoded
        elif len(y.shape) == 2:
            correct_conf = np.sum(y_pred_clipped * y, axis=1, keepdims=True)

        # compute loss
        average_loss = 1/m * np.sum(-np.log(y_pred_clipped[range(len(y_pred_clipped)), y]))
        
        return average_loss

In [28]:
# create dataset
X, y = spiral_data(samples=100, classes=3)

# create dense layer with 2 input features and 3 output values
l1 = Dense(2, 3)
a1 = Activation_ReLU()

# create dense layer with 3 input features and 3 output values
l2 = Dense(3, 3)
a2 = Activation_Softmax()

# forward pass through activation func
yhat1 = a1.forward(l1.forward(X))
yhat2 = a2.forward(yhat1)

# compute loss
loss_function = Loss_CategoricalCrossentropy()
loss = loss_function.forward(yhat2, y)

print(yhat2[:5])
print(loss)

[[0.33333334 0.33333334 0.33333334]
 [0.33332068 0.33335868 0.33332068]
 [0.3332981  0.33340386 0.3332981 ]
 [0.3332748  0.3334504  0.3332748 ]
 [0.33325398 0.33349204 0.33325398]]
1.098567097981771


In [29]:
# calculate accuraccy
preds = np.argmax(yhat2, axis=1)
# convert one-hot encoded
if len(y.shape) == 2:
    y = np.argmax(y, axis=1)

# true evaluates to 1; false to 0
acc = np.mean(preds==y)
print(acc)

0.3333333333333333
