In [None]:
import torch
import torch.nn as nn
import numpy as np

def softmax(x):
  return np.exp(x) / np.sum(np.exp(x), axis=0)

def cross_entropy(actual, predicted):
  loss = -np.sum(actual * np.log(predicted))
  return loss

Y = np.array([1, 0, 0])

Y_pred_good = np.array([0.7, 0.2, 0.1])
Y_pred_bad = np.array([0.1, 0.3, 0.6])
l1 = cross_entropy(Y, Y_pred_good)
l2 = cross_entropy(Y, Y_pred_bad)
print(f'Loss1 numpy {l1:.4f}')
print(f'Loss2 numpy {l2:.4f}')

Loss1 numpy 0.3567
Loss2 numpy 2.3026


In [None]:
# in Pytorch

loss = nn.CrossEntropyLoss()

Y = torch.tensor([
  2, 
  0, 
  1
])

Y_pred_good = torch.tensor([
  [0.0, 1.0, 2.1],
  [2.0, 1.0, 0.1],
  [0.1, 3.0, 0.1]
  ]) # samples x classes = 1x3
Y_pred_bad = torch.tensor([
  [2.0, 1.0, 0.1],
  [0.1, 1.0, 2.1],
  [0.1, 3.1, 0.1]
  ]) # samples x classes = 1x3

l1 = loss(Y_pred_good, Y)
l2 = loss(Y_pred_bad, Y)

print(l1.item())
print(l2.item())

_, predictions1 = torch.max(Y_pred_good, 1)
_, predictions2 = torch.max(Y_pred_bad, 1)

print(predictions1)
print(predictions2)

0.29888758063316345
1.5986647605895996
tensor([2, 0, 1])
tensor([0, 2, 1])


In [26]:
import torch
import torch.nn as nn
# Multiclass Problem
class NeuralNet2(nn.Module):
  def __init__(self, input_dim, hidden_size, output_dim):
    super(NeuralNet2, self).__init__()
    self.lin1 = nn.Linear(input_dim, hidden_size)
    self.relu = nn.ReLU()
    self.lin2 = nn.Linear(hidden_size, output_dim)
  
  def forward(self, x):
    out = self.lin1(x)
    out = self.relu(out)
    out = self.lin2(out)
    print(out)
    # No softmax at the end
    return out

img_dimension = 4*4
num_classes = 3
model = NeuralNet2(input_dim=img_dimension, hidden_size=5, output_dim=num_classes)

# loss and optimizer
criterion = nn.CrossEntropyLoss() # Applies the softmax
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# Create input data: 5 samples, each with a 4x4 image (16 features)
# Shape should be [4, 4, 4] -> flatten to [4, 16]
X_train = torch.tensor([
  [[100, 2, 3, 4], [200, 3, 4, 5], [200,5,2,3], [200,5,2,3]],
  [[1, 2, 3, 400], [2, 3, 4, 500], [2,5,2,300], [2,5,2,300]],
  [[1, 2, 3, 200], [2, 3, 4, 300], [2,5,2,500], [2,5,2,600]],
  [[1, 200, 3, 4], [2, 300, 4, 5], [2,500,2,3], [2,500,2,3]],
  [[1, 150, 3, 4], [2, 582, 4, 5], [2,184,2,3], [2,294,2,3]],
], dtype=torch.float32)
X_test = torch.tensor([
  [[1, 200, 3, 4], [2, 300, 4, 5], [2,500,2,3], [2,500,2,3]],
  [[1, 2, 3, 400], [2, 3, 4, 500], [2,5,2,300], [2,5,2,300]],
  [[100, 2, 3, 4], [200, 3, 4, 5], [200,5,2,3], [200,5,2,3]],
], dtype=torch.float32)

# Flatten the 4x4 image to 16 features: [batch_size, 4, 4] -> [batch_size, 16]
X_train = X_train.reshape(X_train.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)

# Y_train should be torch.long (not float) for CrossEntropyLoss
# Shape: [batch_size] for 5 samples
Y_train = torch.tensor([0, 2, 2, 1, 1], dtype=torch.long)
Y_test = torch.tensor([1, 2, 0], dtype=torch.long)

num_epoch = 1000
for epoch in range(num_epoch):
  Y_pred = model(X_train)
  
  # Only print loss every 100 epochs to reduce clutter
  if epoch % 1000 == 0:
    print(f'Epoch {epoch}, Loss: {criterion(Y_pred, Y_train).item():.4f}')
  
  loss = criterion(Y_pred, Y_train)

  optimizer.zero_grad()
  loss.backward()
  optimizer.step()

print(f'\nFinal Loss: {criterion(model(X_train), Y_train).item():.4f}')
# Print predictions for each sample
Y_test_list = []
Y_test_pred = model(X_test)
for i in range(len(Y_test_pred)):
  _, pred = torch.max(Y_test_pred[i], 0)
  Y_test_list.append(pred.item())

print(f'Predictions: {Y_test_list}')


tensor([[ 28.1224,  10.7516,   5.6213],
        [  2.5340,   5.3580,  -5.0613],
        [  9.6577,   9.9914,  -8.5291],
        [ 18.3072,  32.9770, -32.5518],
        [ -0.4069,   0.2084,   0.0644]], grad_fn=<AddmmBackward0>)
Epoch 0, Loss: 6.0831
tensor([[ 25.7129,   7.3501,   4.5631],
        [ -0.4169,   0.1984,   0.0744],
        [  0.8574,   2.4767,  -2.1930],
        [ 14.2972,  26.5054, -26.1069],
        [ -0.4169,   0.1984,   0.0744]], grad_fn=<AddmmBackward0>)
tensor([[ 23.6226,   4.3947,   3.2464],
        [ -0.4268,   0.1886,   0.0842],
        [ -0.4268,   0.1886,   0.0842],
        [ 10.9547,  20.8411, -20.4766],
        [ -0.4268,   0.1886,   0.0842]], grad_fn=<AddmmBackward0>)
tensor([[ 21.8236,   1.8581,   1.7697],
        [ -0.4368,   0.1800,   0.0938],
        [ -0.4368,   0.1800,   0.0938],
        [  8.5144,  16.6198, -16.2777],
        [ -0.4368,   0.1800,   0.0938]], grad_fn=<AddmmBackward0>)
tensor([[ 20.2595,  -0.3344,   0.2048],
        [ -0.4468,   0.1719,  