In [37]:
import numpy as np

# Ground truth labels (one-hot encoded)
y_true = np.array([
    [1, 0, 0, 0, 0],   # Sample 1: Class 1
    [0, 1, 0, 0, 0],   # Sample 2: Class 2
    [0, 0, 1, 0, 0],   # Sample 3: Class 3
    [0, 0, 0, 1, 0],   # Sample 4: Class 4
    [0, 0, 0, 0, 1],   # Sample 5: Class 5
    ])

In [38]:
# Model predictions (logits)
logits = np.array([
    [2.0, 1.0, 0.1, 0.5, 2],  # Sample 1
    [0.5, 2.5, 0.2, 3, 5],  # Sample 2
    [0.1, 0.2, 3.0, 8, 2],  # Sample 3
    [3.0, 2.0, 0.6, 0.3, 0.5],  # Sample 4
    [4.5, 3.5, 0.3, 9, 2]  # Sample 5
])

In [39]:
# Softmax function
def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=-1, keepdims=True))  # Numerical stability
    return exp_x / np.sum(exp_x, axis=-1, keepdims=True)

In [40]:
# Compute softmax probabilities
probs = softmax(logits)
print("Softmax Probabilities:\n", probs)

Softmax Probabilities:
 [[3.64886502e-01 1.34234242e-01 5.45755703e-02 8.14171836e-02
  3.64886502e-01]
 [8.98234523e-03 6.63710528e-02 6.65428501e-03 1.09427367e-01
  8.08564950e-01]
 [3.67073838e-04 4.05679331e-04 6.67125329e-03 9.90101777e-01
  2.45421693e-03]
 [6.21933902e-01 2.28796696e-01 5.64205707e-02 4.17973868e-02
  5.10514435e-02]
 [1.09311012e-02 4.02132741e-03 1.63918168e-04 9.83986374e-01
  8.97279429e-04]]


In [41]:
# Compute log(probs)
log_probs = np.log(probs)
print("\nLog of Softmax Probabilities:\n", log_probs)


Log of Softmax Probabilities:
 [[-1.00816893 -2.00816893 -2.90816893 -2.50816893 -1.00816893]
 [-4.71249427 -2.71249427 -5.01249427 -2.21249427 -0.21249427]
 [-7.90994754 -7.80994754 -5.00994754 -0.00994754 -6.00994754]
 [-0.47492146 -1.47492146 -2.87492146 -3.17492146 -2.97492146]
 [-4.51614323 -5.51614323 -8.71614323 -0.01614323 -7.01614323]]


In [42]:
# Compute y_true * log(probs)
y_true_log_probs = y_true * log_probs
print("\ny_true * log(probs):\n", y_true_log_probs)


y_true * log(probs):
 [[-1.00816893 -0.         -0.         -0.         -0.        ]
 [-0.         -2.71249427 -0.         -0.         -0.        ]
 [-0.         -0.         -5.00994754 -0.         -0.        ]
 [-0.         -0.         -0.         -3.17492146 -0.        ]
 [-0.         -0.         -0.         -0.         -7.01614323]]


In [43]:
# Compute CE loss for each sample
ce_loss = -np.sum(y_true_log_probs, axis=-1)
print("\nCE Loss per Sample:", ce_loss)


CE Loss per Sample: [1.00816893 2.71249427 5.00994754 3.17492146 7.01614323]


In [44]:
# Average CE loss for the dataset
avg_ce_loss = np.mean(ce_loss)
print("\nAverage CE Loss for the Dataset:", avg_ce_loss)


Average CE Loss for the Dataset: 3.7843350841333083
