In [10]:
import numpy as np
import nnfs
import math

In [11]:
# the *loss function* is the algorithm that quantifies how 'wrong' a model is 
# *loss* is the measure of this metric

# Categorical cross-entropy - used to compare a 'ground truth' probability (y) and some predicted distribution (y hat)
# most commonly used loss functions with softmax-activation for output

# For a 3 class system:
# -(intended_distribution_index_i * log(actual_distribution_index_i) + 
# intended_distribution_index_j * log(actual_distribution_index_j) +
# intended_distribution_index_k * log(actual_distribution_index_k)
# )

# the zero classes end up zeroing out (there should be 0 actual distribution if we are classifying), so it reduces to:
# -(intended_distribution_index_i * log(actual_distribution_index_i))

# we expect that the classes should be distributed favorably here
intended_class_distribution = [1, 0, 0]
softmax_output = [0.7, 0.2, 0.1]
categorical_cross_entropy = -(intended_class_distribution[0] * np.log(softmax_output[0]))
print(categorical_cross_entropy)

# we can reduce this further, since the indended is 1, and 1* num => num
categorical_cross_entropy = -np.log(softmax_output[0])
print(categorical_cross_entropy)

# The CCEL outputs a larger loss, the lower the confidence is:
print(np.log(1))
print(np.log(0.95))
print(np.log(0.9))
print(np.log(0.75))
print(np.log(0.65))
print(np.log(0.50))
print(np.log(0.25))




0.35667494393873245
0.35667494393873245
0.0
-0.05129329438755058
-0.10536051565782628
-0.2876820724517809
-0.4307829160924542
-0.6931471805599453
-1.3862943611198906
0.7
0.5
0.9


In [17]:
# how do we map intended classes?
# let's say for our given 3 class example, we are categorizing each possible class output (index of output set) 
# as a different class [human, cat, dog] 
# so when the model generates an output set of [0.9, 0.05, 0.05] it is very confident the input was a 'human'
# we can put this together into a 'class_targets' array that models the intended classes for each input
class_targets = [0, 1, 1] # human, cat, cat


softmax_outputs = np.array([
    [0.7, 0.2, 0.1],
    [0.1, 0.5, 0.4],
    [0.02, 0.9, 0.08]
])

# then we could crudely iterate through and grab each intended vs predicted
for target_index, output in zip(class_targets, softmax_outputs):
    print(output[target_index])

0.7
0.5
0.9


In [20]:
#now we can use some indexing magic to get a list of confidences at the target indices for each sample:
print(softmax_outputs[range(len(softmax_outputs)), class_targets])

# then take the - log to get the CCEL
print(-np.log(softmax_outputs[range(len(softmax_outputs)), class_targets]))


[0.7 0.5 0.9]
[0.35667494 0.69314718 0.10536052]


In [22]:
# we can calculate the average loss per batch:
loss = -np.log(softmax_outputs[range(len(softmax_outputs)), class_targets])
print(np.mean(loss))

0.38506088005216804


In [24]:
# now we need to consider how to handle one-hot encoded classes
softmax_outputs = np.array([
    [0.7, 0.2, 0.1],
    [0.1, 0.5, 0.4],
    [0.02, 0.9, 0.08]
])
class_targets = np.array([
    [1, 0, 0],
    [0, 1, 0],
    [0, 1, 0]
])

if len(class_targets.shape) == 1:
    correct_confidences = softmax_outputs[
        range(len(softmax_outputs)), class_targets
    ]
elif len(class_targets.shape) == 2:
    correct_confidences = np.sum(
    softmax_outputs * class_targets, axis=1)
    
neg_log = -np.log(correct_confidences)

average_loss = np.mean(neg_log)
print(average_loss)

0.38506088005216804


In [27]:
# Loss and CCEL classes:
class Loss:
    def calculate(self, output, y):
        sample_losses = self.forward(output, y)
        data_loss = np.mean(sample_losses)
        return data_loss
    
class Loss_CategoricalCrossEntropy(Loss):
    def forward(self, y_pred, y_true):
        samples = len(y_pred)
        y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)
        if len(y_true.shape) == 1:
            correct_confidences = y_pred_clipped[
                range(samples), y_true
            ]
        elif len(y_true.shape) == 2:
            correct_confidences = np.sum(
            y_pred_clipped * y_true, axis=1)
            
        negative_loss_likelihoods = -np.log(correct_confidences)
        return negative_loss_likelihoods
    

loss_function = Loss_CategoricalCrossEntropy()
loss = loss_function.calculate(softmax_outputs, class_targets)
print(loss)
        
        
        

0.38506088005216804
