# The softmax activation function

In [1]:
import numpy as np

In [2]:
layer_outputs = [4.8, 1.21, 2.385]

In [3]:
exp_values = np.exp(layer_outputs)
exp_values

array([121.51041752,   3.35348465,  10.85906266])

In [4]:
norm_values = exp_values / np.sum(exp_values)
norm_values

array([0.89528266, 0.02470831, 0.08000903])

In [5]:
np.sum(norm_values)

0.9999999999999999

In [6]:
layer_outputs = np.array([[4.8, 1.21, 2.385],
                         [8.9, -1.81, 0.2],
                         [1.41, 1.051, 0.026]])

In [7]:
print(np.sum(layer_outputs))

18.172


In [8]:
np.sum(layer_outputs, axis=None)

18.172

In [15]:
exp_layer_outputs = np.exp(layer_outputs)
exp_layer_outputs

array([[1.21510418e+02, 3.35348465e+00, 1.08590627e+01],
       [7.33197354e+03, 1.63654137e-01, 1.22140276e+00],
       [4.09595540e+00, 2.86051020e+00, 1.02634095e+00]])

In [16]:
exp_layer_outputs/np.sum(exp_layer_outputs, axis=None)

array([[1.62510862e-02, 4.48502847e-04, 1.45231633e-03],
       [9.80595215e-01, 2.18874853e-05, 1.63353249e-04],
       [5.47802614e-04, 3.82571295e-04, 1.37265228e-04]])

In [17]:
np.sum(exp_layer_outputs/np.sum(exp_layer_outputs, axis=None))

1.0

In [20]:
exp_layer_outputs/np.sum(exp_layer_outputs, axis=0)

array([[1.62935455e-02, 5.25818316e-01, 8.28505614e-01],
       [9.83157221e-01, 2.56605745e-02, 9.31884338e-02],
       [5.49233860e-04, 4.48521109e-01, 7.83059518e-02]])

In [19]:
np.sum(exp_layer_outputs/np.sum(exp_layer_outputs, axis=0), axis=0)

array([1., 1., 1.])

In [22]:
np.sum(layer_outputs, axis=1)

array([8.395, 7.29 , 2.487])

In [23]:
np.sum(layer_outputs[0])

8.395

In [24]:
layer_outputs[0]

array([4.8  , 1.21 , 2.385])

In [25]:
np.sum(layer_outputs, axis=0)

array([15.11 ,  0.451,  2.611])

In [26]:
class ActivationSoftmax:
    def forward(self, inputs):
        # Get unnormalized probabilities
        exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))
        
        # Normalize them for each sample
        probabilities = exp_values / np.sum(exp_values, axis=1, keepdims=True)
        
        self.output = probabilities

In [27]:
np.exp(1)

2.718281828459045

In [28]:
np.exp(10)

22026.465794806718

In [29]:
np.exp(100)

2.6881171418161356e+43

In [30]:
np.exp(1000)

  np.exp(1000)


inf

In [32]:
np.exp(-1000)

0.0

In [33]:
np.exp(-100)

3.720075976020836e-44

In [34]:
softmax = ActivationSoftmax()

In [36]:
softmax.forward([[1, 2, 3]])
softmax.output

array([[0.09003057, 0.24472847, 0.66524096]])

In [45]:
softmax.forward(np.array([[1, 2, 3]]) / 2.)
softmax.output 

array([[0.18632372, 0.30719589, 0.50648039]])

In [44]:
a = np.array([[1, 2, 3]])
a/2.

array([[0.5, 1. , 1.5]])

In [39]:
np.sum(softmax.output, axis=0)

array([2.40819096, 0.38306452, 0.20874452])