In [1]:
import torch
import torch.nn as nn
import numpy as np

In [2]:
# Scores/Logits -> probabilities

In [3]:
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=0)

x = np.array([2.0, 1.0, 0.1])
outputs = softmax(x)
print('softmax numpy:', outputs)

softmax numpy: [0.65900114 0.24243297 0.09856589]


In [4]:
x = torch.tensor([2.0, 1.0, 0.1])
outputs = torch.softmax(x, dim=0)
print(outputs)

tensor([0.6590, 0.2424, 0.0986])


In [5]:
def cross_entropy(y_true, y_pred):
    loss = -np.sum(y_true * np.log(y_pred))
    return loss

In [6]:
# Cross - Entropy: D(Y_pred, Y_true) = -1 / N sum_i (Y_i_true * log(Y_i_pred))
y_true = [1, 0, 0]
y_pred_good = [0.7, 0.2, 0.1]
y_pred_bad = [0.1, 0.3, 0.6]
l1 = cross_entropy(y_true, y_pred_good)
l2 = cross_entropy(y_true, y_pred_bad)
print(l1)
print(l2)

0.35667494393873245
2.3025850929940455


In [7]:
loss = nn.CrossEntropyLoss()
# loss(input, target)

# target is of size nSamples = 1
# each element has class label: 0, 1 or 2
# Y (=target) contains class labels, not one-hot
y_true = torch.tensor([0])

# input is of size nSamples x nClasses = 1 x 3
# y_pred (=input) must be raw, unnormalizes scores (logits) for each class, not softmax
y_pred_good = torch.tensor([[2.0, 1.0, 0.1]])
y_pred_bad = torch.tensor([[0.5, 2.0, 0.3]])
l1 = loss(y_pred_good, y_true)
l2 = loss(y_pred_bad, y_true)

print(f'PyTorch Loss1: {l1.item():.4f}')
print(f'PyTorch Loss2: {l2.item():.4f}')

PyTorch Loss1: 0.4170
PyTorch Loss2: 1.8406


In [8]:
_, pred1 = torch.max(y_pred_good, 1)
_, pred2 = torch.max(y_pred_bad, 1)
print(pred1)
print(pred2)

tensor([0])
tensor([1])


In [9]:
# 3 samples
y_true = torch.tensor([2, 0, 1])
y_pred_good = torch.tensor([[2.0, 1.0, 2.1], [2.0, 1.0, 0.1], [2.0, 4.0, 0.1]])
y_pred_bad = torch.tensor([[0.5, 2.0, 0.3], [0.5, 2.0, 0.3], [0.5, 2.0, 0.3]])
l1 = loss(y_pred_good, y_true)
l2 = loss(y_pred_bad, y_true)

print(f'PyTorch Loss1: {l1.item():.4f}')
print(f'PyTorch Loss2: {l2.item():.4f}')

PyTorch Loss1: 0.4557
PyTorch Loss2: 1.4073


In [10]:
# Binary Classification
class NeuralNet1(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(NeuralNet1, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, 1)

    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        # sigmoid at the end
        y_pred = torch.sigmoid(out)
        return y_pred
    
model = NeuralNet1(input_size=28*28, hidden_size=5)
criterion = nn.BCELoss()

In [12]:
class NeuralNet2(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet2, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.relu = nn. ReLU()
        self.linear2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        # no softmax at the end
        return out
    
model = NeuralNet2(input_size=28*28, hidden_size=5, num_classes=3)
criterion = nn.CrossEntropyLoss() # applies Softmax