## PyTorch Tutorial #11 - Softmax and Cross Entropy

In [1]:
import torch
import torch.nn as nn
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


### Ejemplo de implementación Softmax

In [7]:
# Implementación de softmax en numpy.
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis = 0)

x = np.array([2.0, 1.0, 0.1])
outputs = softmax(x)
outputs

array([0.65900114, 0.24243297, 0.09856589])

In [9]:
# Implementación de softmax en en pytorch con la función propia.
# dim = 0 siginifca que se calcula a lo largo del primer eje.
x = torch.tensor([2.0, 1.0, 0.1])
outputs  =torch.softmax(x, dim = 0)
outputs

tensor([0.6590, 0.2424, 0.0986])

In [10]:
# A veces la función softmax se combina con la función Cross-Entropy

### EJemplo de implementaicón de Cross-Entropy

In [14]:
# Implementación de Cross-Entropy en numpy.
def cross_entropy(actual, predicted):
    loss = -np.sum(actual * np.log(predicted))
    return loss # / float(predicted.shape[0])

y = np.array([1, 0, 0])
y_pred_good = np.array([0.7, 0.2, 0.1]) # low loss
y_pred_bad = np.array([0.1, 0.3, 0.6]) # high loss

l1 = cross_entropy(y, y_pred_good)
l2 = cross_entropy(y, y_pred_bad)

print(f'Loss1 numpy: {l1:.4f}')
print(f'Loss2 numpy: {l2:.4f}')

Loss1 numpy: 0.3567
Loss2 numpy: 2.3026


In [19]:
# Implementación de Cross-Entropy en en pytorch con la función propia.
# CUIDADO! nn.CrossEntropyLoss() ya incluye nn.LogSoftmax() + nn.NLLLoss() (negative log likelihood loss)
# CUIDADO! nn.CrossEntropyLoss() no requiere Softmax en al última capa.
# CUIDADO! nn.CrossEntropyLoss() no requiere que las clases de Y sean OHE, sino el número de clase directamente.
# CUIDADO! nn.CrossEntropyLoss() no requiere que las predicciones de Y pasen por una Softmax, sino que requiere los scores crudos (logits).

loss = nn.CrossEntropyLoss()
y = torch.tensor([0])

# nsamples x nclasses = 1x3
y_pred_good = torch.tensor([[2.0, 1.0, 0.1]])
y_pred_bad = torch.tensor([[0.5, 2.0, 0.3]])

l1 = loss(y_pred_good, y)
l2 = loss(y_pred_bad, y)

print(f'Loss1 torch: {l1.item():.4f}')
print(f'Loss2 torch: {l2.item():.4f}')

Loss1 torch: 0.4170
Loss2 torch: 1.8406


In [21]:
# Si quiero mostrar la predicción de clase, uso torch.max.
_, predictions1 = torch.max(y_pred_good, 1)
_, predictions2 = torch.max(y_pred_bad, 1)
print(predictions1)
print(predictions2)

tensor([0])
tensor([1])


### Implementación de Cross-Entropy con múltiples samples.

In [22]:
# 3 samples.
loss = nn.CrossEntropyLoss()
y = torch.tensor([2, 0, 1])

# nsamples x nclasses = 3x3
y_pred_good = torch.tensor([
    [0.1, 1.0, 2.1],
    [2.0, 1.0, 0.1],
    [0.1, 3.0, 0.1]])
y_pred_bad = torch.tensor([
    [2.1, 1.0, 0.1],
    [0.1, 1.0, 2.1],
    [0.1, 3.0, 0.1]])

l1 = loss(y_pred_good, y)
l2 = loss(y_pred_bad, y)

print(f'Loss1 torch: {l1.item():.4f}')
print(f'Loss2 torch: {l2.item():.4f}')

Loss1 torch: 0.3018
Loss2 torch: 1.6242


In [23]:
# Si quiero mostrar la predicción de clase, uso torch.max.
_, predictions1 = torch.max(y_pred_good, 1)
_, predictions2 = torch.max(y_pred_bad, 1)
print(predictions1)
print(predictions2)

tensor([2, 0, 1])
tensor([0, 2, 1])


### Ejemplo de implementación de multiclase con red neuronal

In [26]:
class NeuralNet2(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet2, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        # no softmax at the end
        return out

In [27]:
model = NeuralNet2(input_size = 28*28, hidden_size = 5, num_classes = 3)
criterion = nn.CrossEntropyLoss() # (applies Softmax)

### Ejemplo de implementación de clase binaria con red neuronal

In [28]:
class NeuralNet1(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(NeuralNet1, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, 1) # siempre va 1 (en el caso de multiclase va la cantidad de clases)

    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        # sigmoid at the end
        y_pred = torch.sigmoid(out)
        return y_pred

In [30]:
model = NeuralNet1(input_size = 28*28, hidden_size = 5)
criterion = nn.BCELoss()