In [None]:
import torch
import torch.nn as nn
import numpy as np

### **1. Manual Softmax Computation & Evaluation**

In [None]:
def softmax(x):
  return np.exp(x)/np.sum(np.exp(x),axis=0)

In [None]:
x = np.array([2.2, 4.6, 6.8, 1.23, 1.58])
outputs = softmax(x)
print('Softmax Matrix In Numpy ==> ',outputs)

Softmax Matrix In Numpy ==>  [0.00889486 0.09804958 0.88489874 0.00337189 0.00478494]


### **2. Softmax Computation & Evaluation Using Tensor Functions**

In [None]:
x = torch.tensor([2.2, 4.6, 6.8, 1.23, 1.58])
outputs = torch.softmax(x, dim=0)

print('Softmax Matrix In Tensor ==> ',outputs)

Softmax Matrix In Tensor ==>  tensor([0.0089, 0.0980, 0.8849, 0.0034, 0.0048])


### **3. Manual Cross-Entropy Loss Computation**

In [None]:
def cross_entropy(y_act, y_pred):
  celoss = -np.sum(y_act*np.log(y_pred))
  return celoss # This is not normalized cross-entropy loss {for normalizing we divide by sample size ==> y_pred[0].shape}

In [None]:
Y = np.array([1,0,0])
Y_pred_good = np.array([0.9,0.1,0.2])
Y_pred_bad = np.array([0.14,0.7,0.69])

ce_good = cross_entropy(Y,Y_pred_good)
ce_bad = cross_entropy(Y,Y_pred_bad)

print(ce_good)
print(ce_bad)

0.10536051565782628
1.9661128563728327


### **4. Cross-Entropy Loss Computation & Evaluation Using Tensor Functions**

#### **Note:**

#### nn.CrossEntropyLoss applies:

#### --> nn.LogSoftMax + nn.NLLLoss (negative log likelihood loss)

#### --> We don't need to apply softmax function in last layer

#### --> Y should have only class labels when we provide input to nn.CrossEntropyLoss.

#### --> Similarly, Y_pred should have only raw scores(logits) and no softmax!

In [None]:
loss = nn.CrossEntropyLoss()

Y = torch.tensor([0]) # This means correct class label = '0' {we aren't doing one-hot encoding}

# nsamples x nclasses = 1x3

Y_pred_good = torch.tensor([[2.0, 1.0, 0.1]]) # Classes are '0', '1', '2' with highest weightage belonging to class '0', which means more accurate prediction, which means lower cross-entropy loss value.
Y_pred_bad = torch.tensor([[0.5, 2.0, 0.3]])  # Same as above, just weightage of class '0' is less as compared to class '1', so prediction is pooer, so more loss-value.

l1 = loss(Y_pred_good, Y)
l2 = loss(Y_pred_bad, Y)

print("Better prediction loss ==> ",l1.item())
print("Worse prediction loss ==> ",l2.item())

Better prediction loss ==>  0.4170299470424652
Worse prediction loss ==>  1.840616226196289


In [None]:
_, prediction1 = torch.max(Y_pred_good, 1) # '1' denotes first dimension
_, prediction2 = torch.max(Y_pred_bad, 1)

print(prediction1) # Gives o/p of highest probability class --> '0' in this case
print(prediction2) # Same as above --> '1' in this case

tensor([0])
tensor([1])


In [None]:
Y_new = torch.tensor([2,0,1]) # Denoting that classes '2', '0' & '1' are correct classes respectively for the samples

# n_samples x n_classes = 3x3
                                   # Class '2' highest weightage;  Class '0' highest weightage;      Class '1' highest weightage;
                                   #   so pred = class '2'           so pred = class '0'                 so pred = class '1'
                                   #   |                                |                                |
                                   #   |                                |                                |
                                   #   V                                V                                V

Y_pred_good_new = torch.tensor([ [0.1, 1.0, 2.1],              [2.0, 1.0, 0.1],                   [0.1, 3.0, 0.1] ])


Y_pred_bad_new = torch.tensor([ [2.1, 1.0, 0.1],[2.0, 1.0, 2.1],[0.1, 3.0, 0.1] ])

l1_new = loss(Y_pred_good_new, Y_new)
l2_new = loss(Y_pred_bad_new, Y_new)

print(l1)
print(l2)


tensor(0.4170)
tensor(1.8406)


In [None]:
_, prediction1_new = torch.max(Y_pred_good_new, 1)
_, prediction2_new = torch.max(Y_pred_bad_new, 1)

print(prediction1_new)
print(prediction2_new)

tensor([2, 0, 1])
tensor([0, 2, 1])
