In [1]:
# LOG LOSS

In [2]:
import numpy as np
from sklearn.metrics import log_loss

In [3]:
def log_loss_custom(y_true, y_pred):
    """
    Calculate log loss (binary cross-entropy)
    Formula: -1/n * Σ[y_true*log(y_pred) + (1-y_true)*log(1-y_pred)]
    Parameters:
    - y_true: actual labels (0 or 1)
    - y_pred: predicted probabilities (0 to 1)
    """
    n = len(y_true)
    
    # Add small epsilon to avoid log(0)
    epsilon = 1e-15
    y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
    
    # Calculate log loss
    loss = -1/n * np.sum(
        y_true * np.log(y_pred) + 
        (1 - y_true) * np.log(1 - y_pred)
    )
    
    return loss

In [6]:
# Test data
y_true = np.array([0, 1, 1, 0, 1])
y_pred = np.array([0.1, 0.8, 0.6, 0.5, 0.3])

# Calculate custom log loss
custom_loss = log_loss_custom(y_true, y_pred)
print(f"Custom log loss: {custom_loss}")

Custom log loss: 0.5472899351247816


In [8]:
# Verify with sklearn
sklearn_loss = log_loss(y_true, y_pred)
print(f"Sklearn log loss: {sklearn_loss}")

# Check if they match
print(f"Results match: {np.isclose(custom_loss, sklearn_loss)}")

Sklearn log loss: 0.5472899351247816
Results match: True


In [None]:
# | Concept           | Description                                                                 |
# | ----------------- | --------------------------------------------------------------------------- |
# | **Log Loss**      | Measures how well a classifier predicts probabilities                       |
# | **Low Log Loss**  | Predictions match true labels                                               |
# | **High Log Loss** | Predictions are wrong/confidently incorrect                                 |
# | **Clipping**      | Prevents math errors from log(0)                                            |
# | **Use Case**      | Used in logistic regression, neural networks, and probabilistic classifiers |
