### Softmax and Cross Entropy

Sofmax: squashes the output to be between zero and one. As a result we get probabilites.

$ S(y_{i}) =\frac{e^{y_{i}}}{\sum e^{y_{j}}} $


In [3]:
# Implementation 
import torch 
import torch.nn as nn 
import numpy as np 


def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=0)
x = np.array([2.0 , 1.0 , 0.1])
outputs = softmax(x)
print('The result of softmax{}'.format(outputs))

The result of softmax[0.65900114 0.24243297 0.09856589]


In [5]:
# implementation in pytorch 

x = torch.tensor([2.0 , 1.0 , 0.1])
outputs = torch.softmax(x, dim=0)
print('The result of softmax {}'.format(outputs))

The result of softmax tensor([0.6590, 0.2424, 0.0986])


### Cross Entropy Loss 

Multiclass classfication problem loss function. The better prediction is lower the loss. e.g. 0.35 better than 2.30. 
The y must be one hot encoded. 
$\hat{y}$ is probability values. 


$ D(y,\hat{y}) =-\frac{1}{N}\sum{Y_i} \log(\hat{Y_{i}})$


In [6]:
def cross_entropy(actual, predicted):
    loss = - np.sum(actual * np.log(predicted))
    return loss # / float(predicted.shape[0]) normalization 

Y = np.array([1,0,0])

Y_pred_good = np.array([0.7,0.2,0.1])
Y_pred_bad = np.array([0.1,0.3,0.6])

l1 = cross_entropy(Y,Y_pred_good)
l2 = cross_entropy(Y,Y_pred_bad)

print('Loss 1:{:4f}'.format(l1))
print('Loss 2:{:4f}'.format(l2))

Loss 1:0.356675
Loss 2:2.302585


In [8]:
# nn.CrossEntropyLoss applies nn.LogSoftmax + nn.NLLLoss --> no softmax layer. 
# Y should not be one hot encoded, we put just the class labels.
# Y_pred has raw scores (logits) no sofmax here. 

# we can have 3 multiple classes, so Y must have 3 values 

loss = nn.CrossEntropyLoss()
Y = torch.tensor([2,0,1]) # size nsample x nclasses
Y_pred_good = torch.tensor([[0.1, 1.0, 2.1],[2.0, 1.0, 0.1],[0.1, 3.0, 0.1]]) # raw values 
Y_pred_bad = torch.tensor([[2.1, 1.0, 0.1],[0.1, 1.0, 2.1],[2.0, 3.0, 0.1]]) # raw values 

l1 = loss(Y_pred_good , Y)
l2 = loss(Y_pred_bad, Y)

print('Loss 1:{:4f}'.format(l1))
print('Loss 2:{:4f}'.format(l2))

_ , prediction_1 = torch.max(Y_pred_good, 1)
_ , prediction_2 = torch.max(Y_pred_bad, 1)


print('Prediction 1:{}'.format(prediction_1))
print('Prediction 2:{}'.format(prediction_2))


Loss 1:0.301824
Loss 2:1.706927
Prediction 1:tensor([2, 0, 1])
Prediction 2:tensor([0, 2, 1])


In [9]:
# NN with Softmax 

class NeuralNet2(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet2,self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, num_classes)
        # self.linear2 = nn.Linear(hidden_size, 1)
    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        # NO softmax at the end
        #y_pred = torch.sigmoid(out)
        return out 
model = NeuralNet2(input_size=28*28, hidden_size=5, num_classes=3)
criterion = nn.CrossEntropyLoss() # (applies softmaxl)
#criterion = nn.BCELoss() # (for binary classification)
# in case of binary classification, then we must implement the sigmoid

In [10]:
# Activation Functions 
# some of the functions are not available in the torch.nn, but available
# in torch.nn.functional F.leaky_relu