### Categorical Cross Entropy Loss function

For a single sample:
```
CCE = -Σ(y_c * log(p_c))
```

For multiple samples:
```
CCE = -(1/N) * ΣΣ(y_ic * log(p_ic))
```

In [2]:
import numpy as np

In [20]:
def softmax(x):
    # Subtract max for numerical stability
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)

class CategoricalCrossEntropy:
    def __init__(self, predicted, actual, is_batch: bool = False):
        self.is_batch = is_batch
        self.predicted = predicted
        self.actual = actual

    def compute(self):
        # Add small epsilon to prevent log(0) and ensure positive values
        epsilon = 1e-15
        predicted_clipped = np.clip(self.predicted, epsilon, 1 - epsilon)
        if self.is_batch:
            # Compute loss for each sample separately, then return all losses
            losses = -np.sum(self.actual * np.log(predicted_clipped), axis=1, keepdims=True)
            return losses
        else:
            return -np.sum(self.actual * np.log(predicted_clipped), keepdims=True)

raw = np.random.randn(2,3)
a = np.array([[1,0,0],[1,0,0]])
print(raw)
print(a)

p = softmax(raw)
print(p)

c = CategoricalCrossEntropy(predicted=p, actual=a, is_batch=True)
print(c.compute())


[[-3.05385172  1.08050068  0.42438593]
 [-0.5935367   0.89054803  0.06414276]]
[[1 0 0]
 [1 0 0]]
[[0.01043278 0.65151827 0.33804894]
 [0.13621688 0.60084262 0.2629405 ]]
[[4.56280223]
 [1.99350697]]
