# Softmax and Cross-Entropy

## 1. Sofmax
Làm cho output về trong khoảng [0;1]

![](../../images/soft_max_2.png)

![](../../images/soft_max_3.png)

In [3]:
import torch
import torch.nn as nn
import numpy as np

def softmax(x):
    # axis = 0: sum for columns
    # axis = 1: sum for rows
    # [2, 1, 1]
    # ---> axis = 0 -> [2,1,1]
    # ---> axis = 1 -> [4]
    return np.exp(x) / np.sum(np.exp(x), axis=0)


x = np.array([2.0, 1.0, 0.1])
outputs = softmax(x)
print (f'softmax numpy: {outputs}')

x = torch.from_numpy(x)
outputs = torch.softmax(x, dim=0)
print(f"Softmax pytorch: {outputs}")

softmax numpy: [0.65900114 0.24243297 0.09856589]
Softmax pytorch: tensor([0.6590, 0.2424, 0.0986], dtype=torch.float64)


## 2. Cross-Entropy

Cross entropy càng nhỏ thì mô hình càng tốt do y predict gần vs y real

![](../../images/cross_entropy_1.png)

In [1]:
import torch
import torch.nn as nn
import numpy as np

def cross_entropy(actual, predicted):
    loss = -np.sum(actual * np.log(predicted))
    return loss

# y must be one hot encoded
# if class 0: [1 0 0]
# if class 1: [0 1 0]
# if class 2: [0 0 1]
Y = np.array([1, 0, 0])

# y_pred has probabilities
Y_pred_good = np.array([0.7, 0.2, 0.1])
Y_pred_bad = np.array([0.1, 0.3, 0.6])
l1 = cross_entropy(Y, Y_pred_good)
l2 = cross_entropy(Y, Y_pred_bad)
print(f'Loss1 numpy: {l1:.4f}')
print(f'Loss2 numpy: {l2:4f}')

Loss1 numpy: 0.3567
Loss2 numpy: 2.302585


### Cross entropy in pytorch

![](../../images/cross_entropy_2.png)

In [14]:
import torch
import torch.nn as nn
import numpy as np

loss = nn.CrossEntropyLoss()

# Y actual has class label, not one-hot
Y = torch.tensor([0])
# nsamples x nclasses = 1x3
# y predicted has like one-hot encoded but in raw scores, not softmax (0 or 1)
Y_pred_good = torch.tensor([[2.0, 1.0, 0.1]]) # index 0 has high probability => good (actual y = 0)
Y_pred_bad = torch.tensor([[0.5, 2.0, 0.3]]) # index 1 has high probability => pred: 1 => bad (actual y = 0)

l1 = loss(Y_pred_good, Y)
l2 = loss(Y_pred_bad, Y)
print("\n1 samples")
print(l1.item())
print(l2.item())

_, prediction1 = torch.max(Y_pred_good,1)
_, prediction2 = torch.max(Y_pred_bad,1)
print(prediction1)
print(prediction2)

# with 3 sample:
Y = torch.tensor([2,0,1])

# nsamples x nclasses = 3x3
Y_pred_good = torch.tensor([[0.1, 2.0, 2.1], [2.0, 1.0, 0.1], [0.1, 3.0, 0.1]])
Y_pred_bad = torch.tensor([[2.1, 1.0, 0.1], [0.1, 1.0, 2.1], [0.1, 3.0, 0.1]])

l1 = loss(Y_pred_good, Y)
l2 = loss(Y_pred_bad, Y)

print("\n3 samples\n")
print(l1)
print(l2)

_, prediction1 = torch.max(Y_pred_good,1)
_, prediction2 = torch.max(Y_pred_bad,1)
print(prediction1)
print(prediction2)




1 samples
0.4170299470424652
1.840616226196289
tensor([0])
tensor([1])

3 samples

tensor(0.4115)
tensor(1.6242)
tensor([2, 0, 1])
tensor([0, 2, 1])


## Neural Network with Softmax

![](../../images/soft_max_4.png)

In [15]:
from turtle import hideturtle
import torch
import torch.nn as nn
import numpy as np

# Multicalss problem
class NeuralNet2(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet2, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        # no softmax at the end
        return out


model = NeuralNet2(input_size=28*28, hidden_size=5, num_classes=3)
criterion = nn.CrossEntropyLoss() # applies Softmax

## Neural NetWork with sigmoid

![](../../images/soft_max_5.png)

In [19]:
import torch
import torch.nn as nn
import numpy as np

# Multicalss problem
class NeuralNet1(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(NeuralNet1, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, 1)

    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        # sigmoid at the end
        y_pred = torch.sigmoid(out)
        return y_pred


model = NeuralNet1(input_size=28*28, hidden_size=5)
criterion = nn.BCELoss()