In [10]:
import numpy as np

# Focal Loss vs Crosss Entropy

## Cross entropy

Easy example with ground truth 1

In [11]:
y = np.array([1])
p = np.array([0.9])

def ce(y_i, p_i):
  return - (y_i * np.log(p_i) + (1 - y_i) * np.log(1 - p_i))

print(ce(y[0], p[0]))

0.10536051565782628


Hard example with ground truth 1

In [12]:
y = np.array([1])
p = np.array([0.3])

def ce(y_i, p_i):
  return - (y_i * np.log(p_i) + (1 - y_i) * np.log(1 - p_i))

print(ce(y[0], p[0]))

1.2039728043259361


Easy example with ground truth 0

In [13]:
y = np.array([0])
p = np.array([0.3])

def ce(y_i, p_i):
  return - (y_i * np.log(p_i) + (1 - y_i) * np.log(1 - p_i))

print(ce(y[0], p[0]))

0.35667494393873245


In [14]:
y = np.array([0])
p = np.array([0.9])

def ce(y_i, p_i):
  return - (y_i * np.log(p_i) + (1 - y_i) * np.log(1 - p_i))

print(ce(y[0], p[0]))

2.302585092994046


Hard example with ground truth 0

## Focal Loss

Easy example with ground truth 1

In [15]:
y = np.array([1])
p = np.array([0.9])

def focal(y_i, p_i, alpha=1, gamma=2):
  return - alpha * ((1 - p_i)**gamma * y_i * np.log(p_i) + p_i * (1 - y_i) * np.log(1 - p_i))

print(focal(y[0], p[0]))

0.0010536051565782623


Hard example with ground truth 1

In [16]:
y = np.array([1])
p = np.array([0.3])

def focal(y_i, p_i, alpha=1, gamma=2):
    return - alpha * ((1 - p_i)**gamma * y_i * np.log(p_i) + p_i * (1 - y_i) * np.log(1 - p_i))

print(focal(y[0], p[0]))

0.5899466741197086


Easy example with ground truth 0

In [17]:
y = np.array([0])
p = np.array([0.3])

def focal(y_i, p_i, alpha=1, gamma=2):
  return - alpha * ((1 - p_i)**gamma * y_i * np.log(p_i) + p_i * (1 - y_i) * np.log(1 - p_i))

print(focal(y[0], p[0]))

0.10700248318161973


### Check the gradient backward of two losses

In [18]:
import numpy as np
from autograd import grad
from autograd import numpy as anp
from autograd.scipy.special import expit as sigmoid  # Autograd-compatible sigmoid

def forward(w, choice="hard"):
    x_easy = np.array([2, 6.25, 1.5, 1.47])
    x_hard = anp.array([-0.8, -2.5, -0.6, -0.58])
    b = sigmoid(anp.dot(w, x_hard)) if choice == "hard" else sigmoid(anp.dot(w, x_easy))
    return b

def ce(y_i, p_i):
    return - (y_i * anp.log(p_i) + (1 - y_i) * anp.log(1 - p_i))

def fl(y_i, p_i, alpha=1, gamma=2):
    return - alpha * ((1 - p_i)**gamma * y_i * anp.log(p_i) + p_i * (1 - y_i) * anp.log(1 - p_i))

y = 1
w = np.array([0.5, 0.16, 0.67, 0.68])

# Compute the gradients of hard example
grad_ce = grad(lambda w: ce(y, forward(w, "hard")))
grad_fl = grad(lambda w: fl(y, forward(w, "hard")))

grad_ce_w = grad_ce(w)
grad_fl_w = grad_fl(w)

print("Gradient of CE with respect to w:", grad_ce_w)
print("Gradient of FL with respect to w:", grad_fl_w)
print()

# Compute the gradients of easy example
grad_ce = grad(lambda w: ce(y, forward(w, "easy")))
grad_fl = grad(lambda w: fl(y, forward(w, "easy")))

grad_ce_w = grad_ce(w)
grad_fl_w = grad_fl(w)

print("Gradient of CE with respect to w:", grad_ce_w)
print("Gradient of FL with respect to w:", grad_fl_w)

Gradient of CE with respect to w: [0.66521171 2.07878658 0.49890878 0.48227849]
Gradient of FL with respect to w: [0.79188105 2.47462829 0.59391079 0.57411376]

Gradient of CE with respect to w: [-0.03581028 -0.11190713 -0.02685771 -0.02632056]
Gradient of FL with respect to w: [-3.42348959e-05 -1.06984050e-04 -2.56761719e-05 -2.51626485e-05]
