# Possible activation functions 

### 1) ReLU

In [None]:
class CustomReLU(nn.Module):
    def __init__(self, inplace: bool = False):
        super(CustomReLU, self).__init__()
        self.inplace = inplace

    def forward(self, x):
        return torch.relu(x, inplace=self.inplace)

### 2) Swish(x) = x * sigmoid(x)

Better than ReLU on deeper models

In [None]:
class Swish(nn.Module):
    def forward(self, x):
        return x * torch.sigmoid(x)

### 3) Mish(x) = x * tanh(softplus(x))

Man munkelt, dass bessere performance

In [None]:
class Mish(nn.Module):
    def forward(self, x):
        return x * torch.tanh(F.softplus(x))

### 4) GELU (Gaussian Error Linear Unit)

Smooth approximation of the ReLU function (used in BERT transformer model though...)

In [None]:
class GELU(nn.Module):
    def forward(self, x):
        return F.gelu(x)

### 5) ELU (Exponential Linear Unit)

Defined as: ELU(x) = x if x > 0, else: α(epx(x) - 1)
Helps in dealing with the vanishing gradient problem

In [None]:
class ELU(nn.Module):
    def __init__(self, alpha=1.0):
        super(ELU, self).__init__()
        self.alpha = alpha

    def forward(self, x):
        return F.elu(x, self.alpha)