# Softmax Numpy

In [6]:
# output to be in between 0 to 1
# scaling for neural networks

In [1]:
import torch
import torch.nn as nn
import numpy as np

In [2]:
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=0)

In [4]:
x = np.array([2.0, 1.0, 0.1])
outputs = softmax(x)
print('softmax numpy:', outputs)

softmax numpy: [0.65900114 0.24243297 0.09856589]


# Softmax PyTorch

In [17]:
x = torch.tensor([2.0, 1.0, 0.1])
outputs = torch.softmax(x, dim=0)
print(outputs)

tensor([0.6590, 0.2424, 0.0986])


# Cross Entropy Numpy

In [7]:
# measures the performance of classification model whose output is a probabilty between 0 and 1
# can be used in multi-class problems
# loss increases as the probability diverges from the actual label (bigger is worse prediction)

In [12]:
def cross_entropy(actual, predicted):
    loss = -np.sum(actual * np.log(predicted))
    return loss # / float(predicted.shape[0])

In [13]:
# y must be one hot encoded
# if class 0: [1 0 0]
# if class 1: [0 1 0]
# if class 2: [0 0 1]
Y = np.array([1, 0, 0])

In [15]:
# y_pred has probabilities
Y_pred_good = np.array([0.7, 0.2, 0.1])
Y_pred_bad = np.array([0.1, 0.3, 0.6])
l1 = cross_entropy(Y, Y_pred_good)
l2 = cross_entropy(Y, Y_pred_bad)

In [16]:
print(f'Loss1 numpy: {l1:.4f}')
print(f'Loss2 numpy: {l2:.4f}')

Loss1 numpy: 0.3567
Loss2 numpy: 2.3026


# Cross Entropy PyTorch

In [None]:
# nn.CrossEntropyLoss applies (nn.LogSoftmax + nn.NLLLoss (negativev log likelhood loss))
# We should not apply the softmax for ourselves in the last layer!

# Y has class labels, must not be One-Hot encoded
# Y_pred has raw scores (logits), so no Softmax!

In [21]:
loss = nn.CrossEntropyLoss()

In [34]:
# 3 samples
Y = torch.tensor([2, 0, 1])
# nsamples x nclasses = 1x3
Y_pred_good = torch.tensor([[0.1, 1.0, 2.1], [2.0, 1.0, 0.1], [2.0, 3.0, 0.1]]) # no softmax
Y_pred_bad = torch.tensor([[2.1, 1.0, 0.1], [0.1, 1.0, 2.1], [0.1, 3.0, 0.1]])

In [38]:
l1 = loss(Y_pred_good, Y)
l2 = loss(Y_pred_bad, Y)

In [39]:
print(l1.item())
print(l2.item())

0.38459011912345886
1.6241613626480103


In [40]:
_, predictions1 = torch.max(Y_pred_good, 1)
_, predictions2 = torch.max(Y_pred_bad, 1)

In [41]:
print(predictions1)
print(predictions2)

tensor([2, 0, 1])
tensor([0, 2, 1])


In [42]:
l1 = loss(Y_pred_good, Y)
l2 = loss(Y_pred_bad, Y)

In [43]:
print(l1.item())
print(l2.item())

0.38459011912345886
1.6241613626480103
