# Softmax and Cross-Entropy

## Softmax

![softmax](notes/softmax.PNG)

$$S(y_i)=\frac{e^{y_i}}{\sum_{}^{}e^{y_j}}$$

In [1]:
import torch
import torch.nn as nn
import numpy as np

In [2]:
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=0)

x = np.array([2.0, 1.0, 0.1])
outputs = softmax(x)

In [3]:
# similarly in pytorch
x = torch.tensor([2.0, 1.0, 0.1])
outputs = torch.softmax(x, dim=0)

outputs

tensor([0.6590, 0.2424, 0.0986])

## Cross-Entropy

![softmax](notes/cross_entropy.PNG)

In [4]:
def cross_entropy(actual, predicted):
    loss = -np.sum(actual * np.log(predicted))
    return loss  # /float(predicted.shape[0])

# y must be one hot encoded
y = np.array([1, 0, 0])

# y_pred has probabilities
y_pred_good = np.array([0.7, 0.2, 0.1])
y_pred_bad = np.array([0.1, 0.3, 0.6])

l1 = cross_entropy(y, y_pred_good)
l2 = cross_entropy(y, y_pred_bad)

print(f'Loss1 numpy: {l1}')
print(f'Loss2 numpy: {l2}')

Loss1 numpy: 0.35667494393873245
Loss2 numpy: 2.3025850929940455


In [5]:
'''
similarly in pytorch,

NOTE: nn.CrossEntropyLoss() applies nn.LogSoftmax + nn.NLLLoss (negative log likelihood loss) already
 
-> no softmax in last layer

y has clas labels, not one hot encoded
y_pred has raw scores (logits), no softmax
'''

loss = nn.CrossEntropyLoss()

# Ground truth is at index 0
y = torch.tensor([0])

# nsamples * nclasses, raw scores
y_pred_good = torch.tensor([[2.0, 1.0, 0.1]])
y_pred_bad = torch.tensor([[0.5, 2.0, 0.3]])

l1 = loss(y_pred_good, y)
l2 = loss(y_pred_bad, y)

print(f'Loss1 numpy: {l1}')
print(f'Loss2 numpy: {l2}')

Loss1 numpy: 0.4170299470424652
Loss2 numpy: 1.840616226196289


In [6]:
_, best_prediction_in_good = torch.max(y_pred_good, 1)
_, best_prediction_in_bad = torch.max(y_pred_bad, 1)

best_prediction_in_good, best_prediction_in_bad

(tensor([0]), tensor([1]))

## Multi-Class Neural Network

![nn](notes/nn_softmax_multi.PNG)

In [7]:
# Multiclass problem
class NeuralNet2(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet2, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size) 
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, num_classes)  
    
    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        # no softmax at the end
        return out

In [8]:
model = NeuralNet2(input_size=28*28, hidden_size=5, num_classes=3)
criterion = nn.CrossEntropyLoss()  # (applies Softmax)

## Binary-Class Neural Network

![nn](notes/nn_softmax_binary.PNG)

In [9]:
# Binary classification
class NeuralNet1(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(NeuralNet1, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size) 
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, 1)  
    
    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        # sigmoid at the end
        y_pred = torch.sigmoid(out)
        return y_pred

In [10]:
model = NeuralNet1(input_size=28*28, hidden_size=5)
criterion = nn.BCELoss()