In [None]:
import numpy as np
import torch
from torch import nn
import torch.nn.functional as F
from torch.autograd import Variable

In [None]:
def NLLLoss(logs, targets):
    out = torch.zeros_like(targets, dtype=torch.float)
    for i in range(len(targets)):
        out[i] = logs[i][targets[i]]
    return -out.mean()

x = torch.randn(3, 5)
y = torch.LongTensor([4, 1, 2])
cross_entropy_loss = torch.nn.CrossEntropyLoss()
log_softmax = torch.nn.LogSoftmax(dim=1)  
x_log = log_softmax(x)

nll_loss = torch.nn.NLLLoss()
print("Torch CrossEntropyLoss: ", cross_entropy_loss(x, y))
print("Torch NLL loss: ", nll_loss(x_log, y))
print("Custom NLL loss: ", NLLLoss(x_log, y))

Torch CrossEntropyLoss:  tensor(2.0834)
Torch NLL loss:  tensor(2.0834)
Custom NLL loss:  tensor(2.0834)


Torch CrossEntropyLoss:  tensor(0.7653)


Torch NLL loss:  tensor(0.7653)


Custom NLL loss:  tensor(0.7653)

In [None]:
def binary_focal_loss(y_pred , y_true,gamma=2.0 , alpha=0.25 ,reduction="mean",function=torch.sigmoid,**kwargs):
    """
    Binary Version of Focal Loss
    :args
    
    y_pred : prediction
    
    y_true : true target labels
    
    gamma: dampeing factor default value 2 works well according to reasearch paper
    
    alpha : postive to negative ratio default value 0.25 means 1 positive and 3 negative can be tuple ,list ,int and float
    
    reduction = mean,sum,none

    function = can be sigmoid or softmax or None
    
    **kwargs: parameters to pass in activation function like dim in softmax
    
    """  
def binary_focal_loss(y_pred , y_true,gamma=2.0 , alpha=0.25 ,reduction="mean",function=torch.sigmoid,**kwargs):    
    if isinstance(alpha,(list,tuple)):
        pos_alpha = alpha[0] # postive sample ratio in the entire dataset
        neg_alpha = alpha[1] #(1-alpha) # negative ratio in the entire dataset
    elif isinstance(alpha ,(int,float)):
        pos_alpha = alpha
        neg_alpha = (1-alpha)
        
    # if else in function can be simplified be removing setting to default to sigmoid  for educational purpose
    if function is not None:
        y_pred = function(y_pred , **kwargs) #apply activation function
    else :
        assert ((y_pred <= 1) & (y_pred >= 0)).all().item() , "negative value in y_pred value should be in the range of 0 to 1 inclusive"
    
    pos_pt = torch.where(y_true==1 , y_pred , torch.ones_like(y_pred)) # positive pt (fill all the 0 place in y_true with 1 so (1-pt)=0 and log(pt)=0.0) where pt is 1
    neg_pt = torch.where(y_true==0 , y_pred , torch.zeros_like(y_pred)) # negative pt
    
    pos_modulating = (1-pos_pt)**gamma # compute postive modulating factor for correct classification the value approaches to zero
    neg_modulating = (neg_pt)**gamma # compute negative modulating factor
    
    
    pos = -pos_alpha* pos_modulating*torch.log(pos_pt) #pos part
    neg = -neg_alpha* neg_modulating*torch.log(1-neg_pt) # neg part
    
    loss = pos+neg  # this is final loss to be returned with some reduction
    
    # apply reduction
    if reduction =="mean":
        return loss.mean()
    elif reduction =="sum":
        return loss.sum()
    elif reduction =="none":
        return loss # reduction mean
    else:
        raise f"Wrong reduction {reduction} is choosen \n choose one among [mean,sum,none]  "
    

In [None]:
y_true_temp = torch.zeros(32,3)
for i in range(0,32):
  temp = 3-(i%3)
  if (temp ==3): 
    temp=0
  y_true_temp[i,temp] = 1

In [None]:
y_true_temp

tensor([[1., 0., 0.],
        [0., 0., 1.],
        [0., 1., 0.],
        [1., 0., 0.],
        [0., 0., 1.],
        [0., 1., 0.],
        [1., 0., 0.],
        [0., 0., 1.],
        [0., 1., 0.],
        [1., 0., 0.],
        [0., 0., 1.],
        [0., 1., 0.],
        [1., 0., 0.],
        [0., 0., 1.],
        [0., 1., 0.],
        [1., 0., 0.],
        [0., 0., 1.],
        [0., 1., 0.],
        [1., 0., 0.],
        [0., 0., 1.],
        [0., 1., 0.],
        [1., 0., 0.],
        [0., 0., 1.],
        [0., 1., 0.],
        [1., 0., 0.],
        [0., 0., 1.],
        [0., 1., 0.],
        [1., 0., 0.],
        [0., 0., 1.],
        [0., 1., 0.],
        [1., 0., 0.],
        [0., 0., 1.]])

In [None]:
y_pred_temp = torch.randn(32,3)
y_pred_temp

tensor([[-0.3204,  1.9149, -0.7586],
        [ 0.7278, -1.1753, -1.3638],
        [-1.2560,  0.6942,  0.0554],
        [ 0.4899,  0.0629, -0.0683],
        [ 0.7691,  0.3859, -0.7012],
        [-1.1072,  0.4527,  0.3343],
        [ 0.0756,  1.6809,  0.7069],
        [-0.8612, -0.8888, -0.3786],
        [-0.7174, -0.4026,  0.6459],
        [ 0.5147,  2.3735,  1.5506],
        [ 0.6520, -0.5849,  0.8950],
        [-0.8604,  0.5190, -0.1252],
        [ 0.0056, -0.7319,  0.6226],
        [-1.0344,  1.1826, -0.4460],
        [-0.0643,  0.4920,  1.0905],
        [-0.0534,  1.1417,  1.9151],
        [-0.4730,  0.8037,  2.1016],
        [-0.4188,  1.2146, -0.0068],
        [-0.9137, -0.0655,  1.6845],
        [-1.4788, -0.9740, -2.3563],
        [ 0.1139,  1.1416,  0.4770],
        [-0.0334,  1.2011,  1.3362],
        [ 0.2451,  0.5860,  0.5101],
        [ 0.2738, -0.5534,  1.0397],
        [ 1.7760, -0.5403, -1.5208],
        [-0.9020, -1.5657, -0.2480],
        [ 0.7809,  0.1468,  1.0403],
 

In [None]:
#y_pred = torch.randn(32,3)
#y_true = torch.empty(32, 3).random_(2)
F.binary_cross_entropy_with_logits(y_pred_temp,y_true_temp) , binary_focal_loss(y_pred_temp,y_true_temp,gamma=2,alpha=[1,1/3]) # to test the correctness of method

(tensor(0.8233), tensor(0.1807))