In [1]:
import torch
import torch.nn as nn
import numpy as np

In [2]:
#s(xi) = e^xi / Σj(0-K)e^xj where K is total size of X
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=0)

In [4]:
x = np.array([2.0, 1.0, 0.1])
outputs = softmax(x)
print('softmax numpy', outputs)

softmax numpy [0.65900114 0.24243297 0.09856589]


In [6]:
x = torch.tensor([2.0, 1.0, 0.1])
outputs = torch.softmax(x, dim=0)
print('softmax numpy', outputs)

softmax numpy tensor([0.6590, 0.2424, 0.0986])


In [12]:
#D(Y_pred, Y) = -1/N * Σ Yi *ln(Y_pred), 1/N is a normalize option
def cross_entropy(actual, predicted):
    loss = -np.sum(actual * np.log(predicted))
    return loss #(one can choose to normalize the result, but we don't do it here)/ float(predicted.shape[0])

In [13]:
# y must be one hot encoded
# if class 0: [1 0 0], say is dog
# if class 1: [0 1 0], cat
# if class 2: [0 0 1], turtle
Y = np.array([1, 0, 0])

# y_pred has probabilities
Y_pred_good = np.array([0.7, 0.2, 0.1])
Y_pred_bad = np.array([0.1, 0.3, 0.6])
l1 = cross_entropy(Y, Y_pred_good)
l2 = cross_entropy(Y, Y_pred_bad)
print(f'Loss1 numpy: {l1:.4f}') #low loss
print(f'Loss2 numpy: {l2:.4f}') #higher loss


Loss1 numpy: 0.3567
Loss2 numpy: 2.3026


In [14]:
#careful! nn.CrossEntropyLoss applies nn.LogSoftmax + NLLLoss
#we must not apply softmax ourself in predict result above
loss =nn.CrossEntropyLoss()

Y = torch.tensor([0])#just give the class label ([0] stand for class 0 is true)
Y_pred_good = torch.tensor([[2.0, 1.0, 0.1]]) #raw value, not applied softmax
Y_pred_bad = torch.tensor([[0.5, 2.0, 0.3]]) 

l1 = loss(Y_pred_good, Y)
l2 = loss(Y_pred_bad, Y)

print(l1.item())
print(l2.item())

0.4170299470424652
1.840616226196289


In [16]:
_, predictions1 = torch.max(Y_pred_good, 1)
_, predictions2 = torch.max(Y_pred_bad, 1)
print(predictions1)
print(predictions2)

tensor([0])
tensor([1])


In [18]:
# we have 3 samples in Y
Y = torch.tensor([2,0,1])
Y_pred_good = torch.tensor([[0.1, 1.0, 2.1], [2.0, 1.0, 0.1], [0.1, 3.0, 0.1]]) #raw value, not applied softmax 3*3
Y_pred_bad = torch.tensor([[2.1, 1.0, 0.1], [0.1, 1.0, 2.1], [0.1, 3.0, 0.1]]) 

l1 = loss(Y_pred_good, Y)
l2 = loss(Y_pred_bad, Y)

print(l1.item())
print(l2.item())

_, predictions1 = torch.max(Y_pred_good, 1)
_, predictions2 = torch.max(Y_pred_bad, 1)
print(predictions1)
print(predictions2)

0.3018244206905365
1.6241613626480103
tensor([2, 0, 1])
tensor([0, 2, 1])


In [21]:
# Multiclass problem
class NeuralNet2(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet2, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        out = self.linear(x)
        out = slef.relu(out)
        out = self.linear2(out)
        #no softmax at the end as CrossEntropyLoss will help to do it        
        return out
    
model = NeuralNet2(input_size=28*28, hidden_size=5, num_classes=3)
criterion = nn.CrossEntropyLoss()

In [22]:
# Binary Classification
class NeuralNet2(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(NeuralNet2, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        #yes or no question, output size always be 1
        self.linear2 = nn.Linear(hidden_size, 1)
        
    def forward(self, x):
        out = self.linear(x)
        out = slef.relu(out)
        out = self.linear2(out)
        #no softmax at the end as CrossEntropyLoss will help to do it        
        return out
    
model = NeuralNet2(input_size=28*28, hidden_size=5)
criterion = nn.BCELoss()