# Loss functions

In [5]:
from pathlib import Path

ROOT_DIR = Path('..') / '..'
!pip install -q -r {ROOT_DIR / 'requirements.txt'}

import torch

## Cross entropy loss

$$H(p, q) = - \sum_{x \in X} p(x) log(q(x))$$

## Examples

In [6]:
from loss import cross_entropy

### 1. Simple Binary Classification

In [7]:
# Example predictions for binary classification
predictions = torch.tensor([0.9, 0.3, 0.2, 0.8])

# Corresponding ground truth labels (either 0 or 1)
targets = torch.tensor([1, 0, 0, 1])

loss = cross_entropy(predictions, targets)
print(loss) # tensor(0.0821)

tensor(0.0821)


### 2. Multi-class Classification

For multi-class classification problems, the cross entropy function should be used with softmax outputs. Here's a simplified example:

In [8]:
from activations import softmax

# Mock predictions from a network (logits)
logits = torch.tensor([[2.0, 1.0, 0.1], [0.5, 2.5, 0.1]])

# Convert logits to probabilities
predictions = softmax(logits, dim=1)

# Ground truth in one-hot encoded format
targets = torch.tensor([[1, 0, 0], [0, 1, 0]])

loss = cross_entropy(predictions, targets)
print(loss)

tensor(0.1035)


### 3. Using the Stable Version

As with the softmax function, numerical instability can sometimes be an issue when dealing with very small or very large values. Here's how to use the stable version:

In [9]:
# Some mock predictions
predictions = torch.tensor([0.9999, 0.0001, 0.9, 0.1])

# Corresponding ground truth
targets = torch.tensor([1, 0, 1, 0])

loss = cross_entropy(predictions, targets, stable=True)
print(loss)

tensor(0.0264)


### 4. Avoiding Zero Predictions

The `eps` parameter helps to avoid taking the logarithm of zero:

In [10]:
# Mock predictions with a zero
predictions = torch.Tensor([1.0, 0.0, 0.9, 0.2])

# Corresponding ground truth
targets = torch.Tensor([1, 0, 1, 0])

# Using an epsilon value
loss = cross_entropy(predictions, targets, stable=True, eps=1e-8)
print(loss)

tensor(0.0263)


### 5. Dealing with Batches

Typically, when training neural networks, we process inputs in batches. The function can handle batched inputs seamlessly:

In [11]:
# Batched predictions
predictions = torch.Tensor([[0.9, 0.1], [0.7, 0.3], [0.2, 0.8]])

# Batched targets
targets = torch.Tensor([[1, 0], [1, 0], [0, 1]])

loss = cross_entropy(predictions, targets)
print(loss)

tensor(0.1142)
