In [2]:
import numpy as np

In [3]:
def mse(y_true, y_pred):
    return np.mean(np.power(y_true - y_pred, 2))

def mse_grad(y_true, y_pred):
    return 2 * (y_pred - y_true) / np.size(y_true)

def binary_cross_entropy(y_true, y_pred):
    return np.mean(-y_true * np.log(y_pred) - (1 - y_true) * np.log(1 - y_pred))

def binary_cross_entropy_grad(y_true, y_pred): # wrt y_pred
    return ((1 - y_true) / (1 - y_pred) - y_true / y_pred) / np.size(y_true)

def cross_entropy(y_true,y_pred,epsilon=1e-15):
    y_pred = np.clip(y_pred, epsilon, 1.0-epsilon)
    return np.mean(-y_true*np.log(y_pred)) 

def cross_entropy_grad(y_true,y_pred,epsilon=1e-15):
    y_pred = np.clip(y_pred, epsilon, 1.0-epsilon)
    return np.mean(-y_true/y_pred) 

In [10]:
y_pred = np.random.randn(4)
y_pred = np.exp(y_pred)/np.sum( np.exp(y_pred) ) 
y = np.zeros(4)
y[2] = 1 

In [11]:
print(y)
print(y_pred)

[0. 0. 1. 0.]
[0.27585904 0.20310151 0.15680211 0.36423734]


In [13]:
cross_entropy_grad(y,y_pred)

-1.5943663184217949

In [15]:
import numpy as np

def cross_entropy_loss(y_true, y_pred):
    """
    Calculate the cross-entropy loss.

    Parameters:
    - y_true: 2D array with one-hot encoded true class labels (shape: [num_samples, num_classes]).
    - y_pred: 2D array with predicted probabilities (shape: [num_samples, num_classes]).

    Returns:
    - float: Cross-entropy loss.
    """
    epsilon = 1e-15  # Small constant to avoid log(0)
    num_samples = len(y_true)

    # Clip predicted probabilities to avoid log(0)
    y_pred = np.clip(y_pred, epsilon, 1 - epsilon)

    # Calculate cross-entropy loss
    loss = -np.sum(y_true * np.log(y_pred)) / num_samples
    return loss

def cross_entropy_loss_grad(y_true, y_pred):
    """
    Calculate the gradient of the cross-entropy loss with respect to the predicted probabilities.

    Parameters:
    - y_true: 2D array with one-hot encoded true class labels (shape: [num_samples, num_classes]).
    - y_pred: 2D array with predicted probabilities (shape: [num_samples, num_classes]).

    Returns:
    - 2D array: Gradient of the cross-entropy loss with respect to y_pred (shape: [num_samples, num_classes]).
    """
    epsilon = 1e-15  # Small constant to avoid division by zero
    num_samples = len(y_true)

    # Clip predicted probabilities to avoid division by zero
    y_pred = np.clip(y_pred, epsilon, 1 - epsilon)

    # Calculate the gradient of the cross-entropy loss
    grad = (-y_true / y_pred) / num_samples
    return grad

# Example usage
num_classes = 3
num_samples = 5

# Generate random one-hot encoded true labels and predicted probabilities
y_true = np.eye(num_classes)[np.random.choice(num_classes, num_samples)]
y_pred = np.random.rand(num_samples, num_classes)

# Calculate cross-entropy loss
loss = cross_entropy_loss(y_true, y_pred)
print(f"Cross-entropy loss: {loss:.4f}")

# Calculate the gradient of cross-entropy loss
grad = cross_entropy_loss_grad(y_true, y_pred)
print("Gradient of cross-entropy loss:")
print(grad)


Cross-entropy loss: 0.8484
Gradient of cross-entropy loss:
[[-0.         -0.59952621 -0.        ]
 [-0.         -0.         -0.78070825]
 [-0.53435849 -0.         -0.        ]
 [-0.25635624 -0.         -0.        ]
 [-0.         -0.34717242 -0.        ]]
