In [181]:
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch.nn import Sigmoid, Tanh, Softmax, ReLU
from torch.nn.functional import leaky_relu, relu, softmax, tanh, sigmoid

x = np.linspace(-10, +10, 1001)

In [None]:
# linear
# useful in regression [last layer]. the output must be a continuous value.
linear = x

# step
# disadvantage: same error value for all wrong samples
step = np.where(x >= 0, 1, 0)

# sign
# disadvantage: same error value for all wrong samples
sign = np.where(x < 0, -1, +1)

# sigmoid
# typically used in the last layer of binary classification
sigmoid = 1 / (1 + np.exp(-x))

# tanh : typically used in hidden layers
# typically used in the middle layers
tanh = 2 * (1 / (1 + np.exp(-2*x))) - 1 # 2 * sigmoid(2x) - 1
tanh = (np.exp(x) - np.exp(-x)) / (np.exp(x) + np.exp(-x))

# relu
# typically used in the middle layers
# Use it to prevent gradient vanishing and exploding
# x<0 -> 0 gradient -> dead neurons -> no update
# it is not differentiable at exactly 0 [a subgradient of 0 is used at 0]
relu = np.where(x > 0, x, 0)

# leaky relu
# improved version of ReLU [gradient leaks for x < 0]
# it is not differentiable at exactly 0 [a subgradient of 0 is used at 0]
leacky_relu = np.where(x > 0, x, 0.01 * x)

# softmax
# typically used in the last layer of multi-class classification
def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum()

# plot
fig, axs = plt.subplots(nrows= 2, ncols= 4, figsize= (16, 8), layout= 'compressed')
fig.suptitle("Activation Functions")

axs[0, 0].plot(x, linear)
axs[0, 0].grid(True)
axs[0, 0].set(title= 'linear')

axs[0, 1].plot(x, step)
axs[0, 1].grid(True)
axs[0, 1].set(title= 'step', xticks= [0], yticks= [-1, 0, +1])

axs[0, 2].plot(x, sign)
axs[0, 2].grid(True)
axs[0, 2].set(title= 'sign', xticks= [0], yticks= [-1, 0, +1])

axs[0, 3].plot(x, sigmoid)
axs[0, 3].grid(True)
axs[0, 3].set(title= 'sigmoid', xticks= np.arange(-10, +12, 2), yticks= np.arange(0, 1.1, .1))

axs[1, 0].plot(x, tanh)
axs[1, 0].grid(True)
axs[1, 0].set(title= 'TanH', xticks= np.arange(-10, +12, 2), yticks= np.arange(-1, 1.1, .2))

axs[1, 1].plot(x, relu)
axs[1, 1].grid(True)
axs[1, 1].set(title= 'ReLU')

axs[1, 2].plot(x, leacky_relu)
axs[1, 2].grid(True)
axs[1, 2].set(title= 'Leaky ReLU')

axs[1, 3].plot(x, softmax(x))
axs[1, 3].grid(True)
axs[1, 3].set(title= 'softmax')

plt.show()

# Create Custom Activation Function

In [183]:
torch.manual_seed(42)

<torch._C.Generator at 0x27caa3c2410>

In [184]:
def custom_softmax(x):
    exp_values = torch.exp(x - torch.max(x, dim=1, keepdim=True)[0])
    probabilities = exp_values / torch.sum(exp_values, dim=1, keepdim=True)
    return probabilities

In [185]:
class CustomActivation(torch.nn.Module):
    def __init__(self):
        super(CustomActivation, self).__init__()

    def forward(self, x):
        exp_values = torch.exp(x - torch.max(x, dim= 1, keepdim= True)[0])
        probabilities = exp_values / torch.sum(exp_values, dim= 1)
        return probabilities

In [186]:
class CustomModel1(torch.nn.Module):
    def __init__(self) -> None:
        super(CustomModel1, self).__init__()

        self.linear = torch.nn.Linear(3, 3)
        self.activtion = CustomActivation()

    def forward(self, x):
        x = self.linear(x)
        x = self.activtion(x)
        return x

model = CustomModel1()
model

CustomModel1(
  (linear): Linear(in_features=3, out_features=3, bias=True)
  (activtion): CustomActivation()
)

In [187]:
class CustomModel2(torch.nn.Module):
    def __init__(self) -> None:
        super(CustomModel2, self).__init__()

        self.linear = torch.nn.Linear(3, 3)

    def forward(self, x):
        x = self.linear(x)
        x = CustomActivation()(x)
        return x

model = CustomModel2()
model

CustomModel2(
  (linear): Linear(in_features=3, out_features=3, bias=True)
)

In [189]:
train_x = torch.tensor([[1, 2, 3], [-4, -5, -6], [7, 8, 9]], dtype= torch.float32)
train_y = torch.tensor([1, 2, 0], dtype= torch.float32)

predictions = model(train_x)

# log
print(f"predictions:\n{predictions}\n")
print(f"predictions.argmax(dim= 1): {predictions.argmax(dim= 1)}")

predictions:
tensor([[5.6504e-01, 5.8902e-01, 1.6239e-01],
        [1.1976e-03, 6.9004e-03, 9.2579e-01],
        [5.6504e-01, 7.9282e-02, 1.4385e-04]], grad_fn=<DivBackward0>)

predictions.argmax(dim= 1): tensor([1, 2, 0])
