# Loss Functions

In [1]:
%cd ..

e:\KTorch


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [2]:
# imports 
from nn import BCELoss, NLLLoss, MSELoss, BCEWithLogitsLoss, Linear, ReLU, Sigmoid, CrossEntropyLoss
from core import KTorch
from autograd import Tensor
import numpy as np
import warnings
import torch 

# Suppress all warnings
warnings.filterwarnings("ignore")

### MSE Loss

In [3]:
# Get inputs
np.random.seed(0)

y_preds = np.random.randint(100, 1000, (32, ))
y_true = np.random.randint(100, 1000, (32, ))

In [4]:
y_preds_tensor = Tensor(y_preds)
y_true_tensor = Tensor(y_true)

# MSE Loss
mse_loss_sum = MSELoss(reduction='sum')
mse_loss_mean = MSELoss(reduction='mean')

# Calculate loss
loss_sum = mse_loss_sum(y_preds_tensor, y_true_tensor)
loss_mean = mse_loss_mean(y_preds_tensor, y_true_tensor)

print(f"Sum Loss: {loss_sum}")
print(f"Mean Loss: {loss_mean}")

Sum Loss: tensor: 3806477.0
Mean Loss: tensor: 118952.40625


In [5]:
# Calculate mse loss manual
diff = y_preds - y_true
mse_mean = np.mean(diff**2)
mse_sum = np.sum(diff**2)

print(f"Manual Sum Loss: {mse_sum}")
print(f"Manual Mean Loss: {mse_mean}")

Manual Sum Loss: 3806477
Manual Mean Loss: 118952.40625


In [6]:
y_preds_torch = torch.tensor(y_preds).float()
y_true_torch = torch.tensor(y_true).float()

# Calculate loss
loss_torch_sum = torch.nn.MSELoss(reduction='sum')(y_preds_torch, y_true_torch)
loss_torch_mean = torch.nn.MSELoss(reduction='mean')(y_preds_torch, y_true_torch)

print(f"Torch Sum Loss: {loss_torch_sum}")
print(f"Torch Mean Loss: {loss_torch_mean}")

Torch Sum Loss: 3806477.0
Torch Mean Loss: 118952.40625


In [7]:
# Testing backprop

# input 
np.random.seed(0)
input = np.random.randn(32, 10).astype(np.float32)
target = np.random.rand(32, 1).astype(np.float32)

# Convert to tensors
x = Tensor(input)
y = Tensor(target)

# model
linear_1 = Linear(10, 5)
relu = ReLU()
linear_2 = Linear(5, 1)
relu_2 = ReLU()

# forward pass
h1_nn = linear_1(x)
h2_nn = relu(h1_nn)
h3_nn = linear_2(h2_nn)
out = relu_2(h3_nn)

out.shape

(32, 1)

In [8]:
# Manual forward pass
weights_1 = linear_1.weight.data
bias_1 = linear_1.bias.data
weights_2 = linear_2.weight.data
bias_2 = linear_2.bias.data

h1 = np.dot(input, weights_1) + bias_1
h2 = np.maximum(h1, 0)
h3 = np.dot(h2, weights_2) + bias_2
out_manual = np.maximum(h3, 0)

out_manual.shape

(32, 1)

In [9]:
np.equal(out.data, out_manual).all()

True

In [10]:
# Loss
loss = MSELoss(reduction='mean')
loss_val = loss(out, y)

# Manual loss
diff = out_manual - target
loss_manual = np.mean(diff**2)

np.equal(loss_val.data, loss_manual).all()

True

In [11]:
# Backward pass
loss_val.backward()

In [12]:
# Manual backward pass
dL = np.ones_like(diff)
print((loss_val.grad == dL).all())
d_diff = dL * 2 * diff / 32
d_out_manual = d_diff
print((d_out_manual == out.grad).all())
d_h3 = d_out_manual * (h3 > 0)
print((d_h3 == h3_nn.grad).all())
d_weights_2 = np.dot(h2.T, d_h3)
d_bias_2 = np.sum(d_h3, axis=0)
print((d_weights_2 == linear_2.weight.grad).all())
print((d_bias_2 == linear_2.bias.grad).all())
d_h2 = np.dot(d_h3, weights_2.T)
print((d_h2 == h2_nn.grad).all())
d_h1 = d_h2 * (h1 > 0)
print((d_h1 == h1_nn.grad).all())
d_weights_1 = np.dot(input.T, d_h1)
d_bias_1 = np.sum(d_h1, axis=0)
print((d_weights_1 == linear_1.weight.grad).all())
print((d_bias_1 == linear_1.bias.grad).all())

True
True
True
True
True
True
True
True
True


#### MSE Sum

In [13]:
# Testing backprop

# input 
np.random.seed(0)
input = np.random.randn(32, 10).astype(np.float32)
target = np.random.rand(32, 1).astype(np.float32)

# Convert to tensors
x = Tensor(input)
y = Tensor(target)

# model
linear_1 = Linear(10, 5)
relu = ReLU()
linear_2 = Linear(5, 1)
relu_2 = ReLU()

# forward pass
h1_nn = linear_1(x)
h2_nn = relu(h1_nn)
h3_nn = linear_2(h2_nn)
out = relu_2(h3_nn)

out.shape

(32, 1)

In [14]:
# Manual forward pass
weights_1 = linear_1.weight.data
bias_1 = linear_1.bias.data
weights_2 = linear_2.weight.data
bias_2 = linear_2.bias.data

h1 = np.dot(input, weights_1) + bias_1
h2 = np.maximum(h1, 0)
h3 = np.dot(h2, weights_2) + bias_2
out_manual = np.maximum(h3, 0)

out_manual.shape

(32, 1)

In [15]:
np.equal(out.data, out_manual).all()

True

In [16]:
# Loss
loss = MSELoss(reduction='sum')
loss_val = loss(out, y)

# Manual loss
diff = out_manual - target
loss_manual = np.sum(diff**2)

np.equal(loss_val.data, loss_manual).all()

True

In [17]:
# Backward pass
loss_val.backward()

In [18]:
# Manual backward pass
dL = np.ones_like(diff)
print((loss_val.grad == dL).all())
d_diff = dL * 2 * diff
d_out_manual = d_diff
print((d_out_manual == out.grad).all())
d_h3 = d_out_manual * (h3 > 0)
print((d_h3 == h3_nn.grad).all())
d_weights_2 = np.dot(h2.T, d_h3)
d_bias_2 = np.sum(d_h3, axis=0)
print((d_weights_2 == linear_2.weight.grad).all())
print((d_bias_2 == linear_2.bias.grad).all())
d_h2 = np.dot(d_h3, weights_2.T)
print((d_h2 == h2_nn.grad).all())
d_h1 = d_h2 * (h1 > 0)
print((d_h1 == h1_nn.grad).all())
d_weights_1 = np.dot(input.T, d_h1)
d_bias_1 = np.sum(d_h1, axis=0)
print((d_weights_1 == linear_1.weight.grad).all())
print((d_bias_1 == linear_1.bias.grad).all())

True
True
True
True
True
True
True
True
True


### BCELoss

In [19]:
# Get inputs
np.random.seed(0)
inputs = np.random.randn(32, 10).astype(np.float32)
targets = np.random.randint(0, 2, (32, 1)).astype(np.float32)

# Convert to tensors
x = Tensor(inputs)
y = Tensor(targets)

# Try forward pass of bce loss
preds = np.random.rand(32, 1).astype(np.float32)
preds_tensor = Tensor(preds)

bce_loss_mean = BCELoss(reduction='mean')
bce_loss_sum = BCELoss(reduction='sum')

loss_mean = bce_loss_mean(preds_tensor, y)
loss_sum = bce_loss_sum(preds_tensor, y)

print(f"Mean Loss: {loss_mean}")
print(f"Sum Loss: {loss_sum}")

loss_mean.data.dtype

Mean Loss: tensor: 1.1908915042877197
Sum Loss: tensor: 38.10852813720703


dtype('float32')

In [20]:
# Manual loss calculation
log_1 = np.log(preds).astype(np.float32)
log_2 = np.log(1 - preds).astype(np.float32)
log_1 = np.clip(log_1, -100, float('inf'))
log_2 = np.clip(log_2, -100, float('inf'))
loss_term_1 = - targets * log_1
loss_term_2 = - (1 - targets) * log_2
loss = loss_term_1 + loss_term_2
loss_manual_mean = np.mean(loss)
loss_manual_sum = np.sum(loss)

print(f"Manual Mean Loss: {loss_manual_mean}")
print(f"Manual Sum Loss: {loss_manual_sum}")

Manual Mean Loss: 1.1908915042877197
Manual Sum Loss: 38.10852813720703


In [21]:
# Check torch
preds_torch = torch.tensor(preds).float()
targets_torch = torch.tensor(targets).float()

loss_torch_mean = torch.nn.BCELoss(reduction='mean')(preds_torch, targets_torch)
loss_torch_sum = torch.nn.BCELoss(reduction='sum')(preds_torch, targets_torch)

print(f"Torch Mean Loss: {loss_torch_mean}")
print(f"Torch Sum Loss: {loss_torch_sum}")

Torch Mean Loss: 1.1908913850784302
Torch Sum Loss: 38.108524322509766


In [22]:
# Check loss values
np.equal(loss_mean.data, loss_manual_mean), np.equal(loss_sum.data, loss_manual_sum)

(True, True)

In [23]:
# Forward pass
np.random.seed(0)

layer_1 = Linear(10, 5)
relu = ReLU()
layer_2 = Linear(5, 1)
sigmoid = Sigmoid()

# Forward pass
h1 = layer_1(x)
h2 = relu(h1)
h3 = layer_2(h2)
out = sigmoid(h3)

out.shape

(32, 1)

In [24]:
# Manual forward pass
weights_1 = layer_1.weight.data
bias_1 = layer_1.bias.data
weights_2 = layer_2.weight.data
bias_2 = layer_2.bias.data

h1_manual = np.dot(inputs, weights_1) + bias_1
h2_manual = np.maximum(h1_manual, 0)
h3_manual = np.dot(h2_manual, weights_2) + bias_2
pos_mask = h3_manual >= 0
t = np.zeros_like(h3_manual)
t[pos_mask] = 1 / (1 + np.exp(-h3_manual[pos_mask]))
t[~pos_mask] = np.exp(h3_manual[~pos_mask]) / (1 + np.exp(h3_manual[~pos_mask]))
out_manual = t
out_manual.shape

(32, 1)

In [25]:
# Check if the outputs are the same
np.equal(out.data, out_manual).all(), np.sum(out.data - out_manual)

(True, 0.0)

In [26]:
# Loss
bce_loss = BCELoss(reduction='mean')
loss_val = bce_loss(out, y)

# Manual loss
log_1 = np.log(out_manual).astype(np.float32)
sub_out_manual = 1 - out_manual
log_2 = np.log(sub_out_manual).astype(np.float32)
log_1_clip = np.clip(log_1, -100, float('inf'))
log_2_clip = np.clip(log_2, -100, float('inf'))
loss_term_1 = - targets * log_1_clip
loss_term_2 = - (1 - targets) * log_2_clip
loss = loss_term_1 + loss_term_2
loss_manual = np.mean(loss)

np.equal(loss_val.data, loss_manual).all()

True

In [27]:
# Backward pass
loss_val.backward()

In [28]:
# Manual backward pass
dL = np.ones_like(loss)
print((loss_val.grad == dL).all())

d_loss = dL / 32
print((d_loss == loss_val._prev[0].grad).all(), loss_val._prev[0].label)

d_loss_term_1 = d_loss
print((d_loss_term_1 == loss_val._prev[0]._prev[0].grad).all()) 

d_loss_term_2 = d_loss
print((d_loss_term_1 == loss_val._prev[0]._prev[1].grad).all()) 

d_log_1_clip = - targets * d_loss_term_1
print((d_log_1_clip == loss_val._prev[0]._prev[0]._prev[1].grad).all(), loss_val._prev[0]._prev[0]._prev[1].label) 

d_log_2_clip = d_loss_term_2 * - (1 - targets)
print((d_log_2_clip == loss_val._prev[0]._prev[1]._prev[1].grad).all(), loss_val._prev[0]._prev[1]._prev[1].label)

d_log_1  = d_log_1_clip * (log_1 > -100)
print((d_log_1 == loss_val._prev[0]._prev[0]._prev[1]._prev[0].grad).all(), loss_val._prev[0]._prev[0]._prev[1]._prev[0].label)

d_log_2 = d_log_2_clip * (log_2 > -100)
print((d_log_2 == loss_val._prev[0]._prev[1]._prev[1]._prev[0].grad).all(), loss_val._prev[0]._prev[1]._prev[1]._prev[0].label)

d_out_manual = d_log_1  / out_manual
d_sub_out_manual = d_log_2 / sub_out_manual
d_out_manual = d_out_manual - d_sub_out_manual
print((d_out_manual == out.grad).all(), out.label)

d_h3_manual = d_out_manual * out_manual * (1 - out_manual)  
print((d_h3_manual == h3.grad).all())

d_weights_2 = np.dot(h2_manual.T, d_h3_manual)
d_bias_2 = np.sum(d_h3_manual, axis=0)
print((d_weights_2 == layer_2.weight.grad).all())

d_h2_manual = np.dot(d_h3_manual, weights_2.T)
print((d_h2_manual == h2.grad).all())

d_h1_manual = d_h2_manual * (h1_manual > 0)
print((d_h1_manual == h1.grad).all())

d_weights_1 = np.dot(inputs.T, d_h1_manual)
d_bias_1 = np.sum(d_h1_manual, axis=0)
print((d_weights_1 == layer_1.weight.grad).all())
print((d_bias_1 == layer_1.bias.grad).all())

True
True  *  * clamp(log(sigmoid(ReLU( @  + ) @  + ))) +  *  +  *  * clamp(log(sigmoid(ReLU( @  + ) @  + ) *  + ))
True
True
True clamp(log(sigmoid(ReLU( @  + ) @  + )))
True clamp(log(sigmoid(ReLU( @  + ) @  + ) *  + ))
True log(sigmoid(ReLU( @  + ) @  + ))
True log(sigmoid(ReLU( @  + ) @  + ) *  + )
True sigmoid(ReLU( @  + ) @  + )
True
True
True
True
True
True


### BCEWithLogitsLoss

In [29]:
# Get inputs
np.random.seed(0)
inputs = np.random.randn(32, 10).astype(np.float32)
targets = np.random.randint(0, 2, (32, 1)).astype(np.float32)

# Convert to tensors
x = Tensor(inputs)
y = Tensor(targets)

# Try forward pass of bce loss
preds = np.random.randint(-100, 100, (32, 1)).astype(np.float32)
preds_tensor = Tensor(preds)
sig_output = KTorch.sigmoid(preds_tensor) 

bce_logit_loss_mean = BCEWithLogitsLoss(reduction='mean')
bce_logit_loss_sum = BCEWithLogitsLoss(reduction='sum')


loss_mean = bce_logit_loss_mean(preds_tensor, y)
loss_sum = bce_logit_loss_sum(preds_tensor, y)

print(f"Mean Loss: {loss_mean}")
print(f"Sum Loss: {loss_sum}")

Mean Loss: tensor: 24.625078201293945
Sum Loss: tensor: 788.0025024414062


In [30]:
# Check torch
preds_torch = torch.tensor(preds).float()
targets_torch = torch.tensor(targets).float()

loss_torch_mean = torch.nn.BCEWithLogitsLoss(reduction='mean')(preds_torch, targets_torch)
loss_torch_sum = torch.nn.BCEWithLogitsLoss(reduction='sum')(preds_torch, targets_torch)

print(f"Torch Mean Loss: {loss_torch_mean}")
print(f"Torch Sum Loss: {loss_torch_sum}")

Torch Mean Loss: 24.625078201293945
Torch Sum Loss: 788.0025024414062


In [31]:
# Check loss values
np.equal(loss_mean.data, loss_torch_mean.detach().numpy()), np.equal(loss_sum.data, loss_torch_sum.detach().numpy())  

(True, True)

In [32]:
# Forward pass
np.random.seed(0)

layer_1 = Linear(10, 5)
relu = ReLU()
layer_2 = Linear(5, 1)

# Forward pass
h1 = layer_1(x)
h2 = relu(h1)
out = layer_2(h2)

out.shape

(32, 1)

### Negative log likelihood Loss

In [33]:
# Get inputs
np.random.seed(0)
inputs = np.random.randn(32, 10).astype(np.float32)
targets = np.random.randint(0, 10, (32, 1)).astype(np.float32)
y_preds = np.random.randn(32, 10).astype(np.float32)

# Convert to tensors
x = Tensor(inputs)
y = Tensor(targets)
y_preds_tensor = Tensor(y_preds)

# Try forward pass of cross entropy loss
nll_loss_mean = NLLLoss(reduction='mean')
nll_loss_sum = NLLLoss(reduction='sum')

loss_mean = nll_loss_mean(y_preds_tensor, y)
loss_sum = nll_loss_sum(y_preds_tensor, y)

print(f"Mean Loss: {loss_mean}")
print(f"Sum Loss: {loss_sum}")

Mean Loss: tensor: 0.33466750383377075
Sum Loss: tensor: 10.709360122680664


In [34]:
x_torch = torch.tensor(inputs)
y_torch = torch.tensor(targets).long()
y_preds_torch = torch.tensor(y_preds)

nll_loss_torch_mean = torch.nn.NLLLoss(reduction='mean')
nll_loss_torch_sum = torch.nn.NLLLoss(reduction='sum')

print(y_preds_torch.shape, y_torch.shape)

loss_torch_mean = nll_loss_torch_mean(y_preds_torch, y_torch.squeeze())
loss_torch_sum = nll_loss_torch_sum(y_preds_torch, y_torch.squeeze())

print(f"Torch Mean Loss: {loss_torch_mean}")
print(f"Torch Sum Loss: {loss_torch_sum}")

torch.Size([32, 10]) torch.Size([32, 1])
Torch Mean Loss: 0.33466753363609314
Torch Sum Loss: 10.70936107635498


### CrossEntropy Loss

In [35]:
# Get inputs
np.random.seed(0)
inputs = np.random.randn(32, 10).astype(np.float32)
targets = np.random.randint(0, 10, (32, 1)).astype(np.float32)
y_preds = np.random.randn(32, 10).astype(np.float32)

# Convert to tensors
x = Tensor(inputs)
y = Tensor(targets)
y_preds_tensor = Tensor(y_preds)

# Try forward pass of cross entropy loss
cross_entropy_loss_mean = CrossEntropyLoss(reduction='mean')
cross_entropy_loss_sum = CrossEntropyLoss(reduction='sum')

loss_mean = cross_entropy_loss_mean(y_preds_tensor, y)
loss_sum = cross_entropy_loss_sum(y_preds_tensor, y)

print(f"Mean Loss: {loss_mean}")
print(f"Sum Loss: {loss_sum}")

Mean Loss: tensor: 2.897928237915039
Sum Loss: tensor: 92.73370361328125


In [36]:
# Check torch
x_torch = torch.tensor(inputs)
y_torch = torch.tensor(targets).long()
y_preds_torch = torch.tensor(y_preds)

cross_entropy_loss_torch_mean = torch.nn.CrossEntropyLoss(reduction='mean')
cross_entropy_loss_torch_sum = torch.nn.CrossEntropyLoss(reduction='sum')

print(y_preds_torch.shape, y_torch.shape)

loss_torch_mean = cross_entropy_loss_torch_mean(y_preds_torch, y_torch.squeeze())
loss_torch_sum = cross_entropy_loss_torch_sum(y_preds_torch, y_torch.squeeze())

print(f"Torch Mean Loss: {loss_torch_mean}")
print(f"Torch Sum Loss: {loss_torch_sum}")

torch.Size([32, 10]) torch.Size([32, 1])
Torch Mean Loss: 2.89792799949646
Torch Sum Loss: 92.73369598388672


In [37]:
# Check equivalence
np.isclose(loss_mean.data, loss_torch_mean.detach().numpy()), np.isclose(loss_sum.data, loss_torch_sum.detach().numpy())

(True, True)