In [2]:
#softmax = exponential(output)/sum(exponentals(outputs))
#squashes the output between 0 and 1

In [3]:
import torch
import torch.nn as nn
import numpy as np

def softmax(x):
    return np.exp(x)/np.sum(np.exp(x),axis=0)

x = np.array([2.0,1.0,0.1])
outputs = softmax(x)
print('softmax numpy: ',outputs)

softmax numpy:  [0.65900114 0.24243297 0.09856589]


In [5]:
x = torch.tensor([2.0,1.0,0.1])
output = torch.softmax(x,dim=0)#dim=0, computes along the first axis
print(output)

tensor([0.6590, 0.2424, 0.0986])


In [6]:
#lot of times softmax is combined with cross entropy loss
#cross entropy used in multi class problems
#loss increases as the predicted probability diverges from the actual label
#better our prediction, lower our loss
#bad prediction-high entropy loss

In [7]:
#cross entropy = -mean(sum(output*log(pred_output)))

In [8]:
def cross_entropy(actual,predicted):
    return -np.sum(actual*np.log(predicted))#we normalise later

In [9]:
Y = np.array([1,0,0]) #must be one hot encoded
Y_pred_good = np.array([0.7,0.1,0.2])
Y_pred_bad = np.array([0.1,0.1,0.6])
l1 = cross_entropy(Y,Y_pred_good)
l2 = cross_entropy(Y,Y_pred_bad)
print(f'Loss1 numpy: {l1:.4f}')
print(f'Loss2 numpy: {l2:.4f}')

Loss1 numpy: 0.3567
Loss2 numpy: 2.3026


In [10]:
loss = nn.CrossEntropyLoss()
#nnCrossEntropyLoss applies nn.LogSoftmax + nn.NLLLoss(negative log likelihood loss)
#no softmax in last layer! must not implement the softmax layer for ourselves
#Y has class labels, not One-hot
#Y_pred has raw scores (logits), no softmax

In [11]:
Y = torch.tensor([0])
#n_samples x nClasses = 1x3
Y_pred_good = torch.tensor([[2.0,1.0,0.1]])
Y_pred_bad = torch.tensor([[0.5,2.0,0.3]])
l1 = loss(Y_pred_good,Y)
l2 = loss(Y_pred_bad,Y)
print(l1.item())
print(l2.item())

0.4170299470424652
1.840616226196289


In [12]:
_, predictions1 = torch.max(Y_pred_good,1)
_, predictions2 = torch.max(Y_pred_bad,1)

In [13]:
print(predictions1)
print(predictions2)

tensor([0])
tensor([1])


In [14]:
#3 samples
#n_samples x nClasses = 3x3
Y = torch.tensor([2,0,1])
Y_pred_good = torch.tensor([[0.1,1.0,2.1],[2.1,1.0,0.1],[0.1,3.0,0.1]])
Y_pred_bad = torch.tensor([[2.1,1.0,0.1],[0.1,1.0,2.1],[0.1,3.0,0.1]])

In [15]:
l1 = loss(Y_pred_good,Y)
l2 = loss(Y_pred_bad,Y)
print(l1.item()) #loss is less and good
print(l2.item()) #high loss

0.29082828760147095
1.6241613626480103


In [16]:
#Neural network with Softmax
#Multiclass classification problem
#input layer -> hidden layer(activation functions in between) -> linear layer(with one output for each class) ->
#softmax and get the probabilities

In [17]:
class NeuralNet2(nn.Module):
    
    def __init__(self,input_size,hidden_size,num_classes):
        super(NeuralNet2,self).__init__()
        self.linear1 = nn.Linear(input_size,hidden_size)
        self.relu = nn.ReLU()#activation function
        self.linear2 = nn.Linear(input_size,hidden_size)
        
    def forward(self,x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        return out
    
model = NeuralNet2(input_size=28*28,hidden_size = 5, num_classes = 3)
criterion = nn.CrossEntropyLoss() #applies Softmax

In [19]:
#if binary classification - we use sigmoid and apply Binary classification loss
class NeuralNet1(self,x):
    def __init__(self,input_size,hidden_size):
        super(NeuralNet1,self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size,1)
    
    def forward(self,x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        y_pred = torch.sigmoid(out)
        return y_pred
model = NeuralNet1(input_size=28*28,hidden_size=5)
criterion = nn.BCELoss()