# Softmax Activation Function

In [1]:
import numpy as np

In [2]:
def softmax1(Z):
    numerator = np.exp(Z)
    denominator = np.sum(numerator,axis=0,keepdims=True)
    return numerator/denominator

In [3]:
# Softmax Example
Z = np.array([[0.1,-0.1,-0.2],[-0.2, 0.2, 0.3],[-0.3,0.1,0.2],[0.4,-0.3,-0.5]])
A = softmax1(Z)
print("Z: \n{}".format(Z))
print("A: \n{}".format(A))
# sum in row direction (down each column)
print("Sum of A in row direction: {}".format(np.sum(A,axis=0)))

Z: 
[[ 0.1 -0.1 -0.2]
 [-0.2  0.2  0.3]
 [-0.3  0.1  0.2]
 [ 0.4 -0.3 -0.5]]
A: 
[[0.26588694 0.22779083 0.20486076]
 [0.19697389 0.30748546 0.3377583 ]
 [0.17822935 0.27822435 0.30561635]
 [0.35890983 0.18649936 0.15176459]]
Sum of A in row direction: [1. 1. 1.]


### Example with numerical overflow

In [4]:
Z = np.array([[1000,-1000,0],[-500,500,250]])
print("Z: \n{}".format(Z))
A = softmax1(Z)

Z: 
[[ 1000 -1000     0]
 [ -500   500   250]]


  
  after removing the cwd from sys.path.


### Adjustment to softmax to avoid overflow

In [5]:
def softmax2(Z):
    Zmax_col = np.amax(Z,axis=0,keepdims=True)
    numerator = np.exp(Z-Zmax_col)
    denominator = np.sum(numerator,axis=0,keepdims=True)
    return numerator/denominator

In [6]:
A = softmax2(Z)
print("A: \n{}".format(A))

A: 
[[1.00000000e+000 0.00000000e+000 2.66919022e-109]
 [0.00000000e+000 1.00000000e+000 1.00000000e+000]]
