In [1]:
import torch.nn as nn
import torch
import numpy as np
from torch import exp, where, erf, tensor, log, argwhere, ones_like, ones, heaviside, sign
from torch import pow as POW
from numpy import pi, e
from scipy.special import expi
from numpy import log as nplog 
from torch import max as Max
import matplotlib.pyplot as plt
from scipy.special import erf as sp_erf

In [2]:
# irrelevant block cell (Purpose is to work with torch functions and plot the graphs)
c = 0
if c == 1: 
    vals = torch.arange(-2., 1., 0.01, dtype=torch.float64)
    nvals = -vals
    base = 2
    svals = torch.randn(len(vals))
    print(vals.device)

In [3]:
# irrelevant block cell
if c == 1: 
    plt.grid()
    plt.plot(vals, supeReluTest(vals))

In [4]:
# irrelevant block cell
if c==1:
    datax = torch.arange(-10, 5, 0.01, dtype=torch.float64, requires_grad=True) 
    #datax = tensor([0])
    z = 1 
    
    grad = where(datax < 0, 1-exp(-POW(datax/z, -2)), 1)

    grad2 = datax*grad - where(datax < 0, pi**(0.5)*(erf(POW(datax/z, -1))+1), 0)

    ddata = torch.where(datax < 0, 1-exp(-(z/datax)**2), 1)
    
    data = torch.where(datax < 0, datax*(1-exp(-(z/datax)**2))
                         - z*pi**0.5*(erf(z/datax)+1), datax)

    if not(torch.all(grad==ddata) and torch.all(grad2==data)): 
        plt.plot(datax.detach().numpy(), data.detach().numpy()-grad2.detach().numpy())
        plt.plot(datax.detach().numpy(), ddata.detach().numpy()-grad.detach().numpy())
        plt.show()

In [5]:
# irrelevant block cell
if c==1:
    plt.plot(datax.detach().numpy(), data.detach().numpy())
    plt.plot(datax.detach().numpy(), grad2.detach().numpy())
    plt.grid()
    plt.show()

    plt.plot(datax.detach().numpy(), ddata.detach().numpy())
    plt.plot(datax.detach().numpy(), grad.detach().numpy())
    plt.grid()
    plt.show()

In [10]:

#Softplusplus graph https://www.sciencedirect.com/science/article/pii/S0925231219317163
class softplusplus(torch.autograd.Function):
        
    @staticmethod
    def forward(ctx, data:tensor):

        function_val = 1 + exp(data)
        
        ctx.save_for_backward(function_val)

        return log(function_val)+data/2-nplog(2)

    @staticmethod
    def backward(ctx, grad_output:tensor):
        (function_val,) = ctx.saved_tensors
        
        grad = 1.5-1/function_val

        return grad*grad_output

#Our custom grad function 
class supeRelu(torch.autograd.Function):
        
    @staticmethod
    def forward(ctx, data:tensor):
        
        grad = where(data < 0, 1-exp(-POW(data, -2)), 1)
        
        ctx.save_for_backward(grad)

        return data*grad - where(data < 0, pi**(0.5)*(erf(POW(data, -1))+1), 0)
        
    @staticmethod
    def backward(ctx, grad_output:tensor):
        
        return ctx.saved_tensors[0]*grad_output

#Our custom grad function with trainable parameters (note this is a helper function)
class supeRelu2(torch.autograd.Function):
        
    @staticmethod
    def forward(ctx, data:tensor, a, b, c):

        ctx.a = a

        ctx.b = b 

        ctx.c = c 

        val = b*POW(data, -1) 

        ctx.component1 = where(data < 0, exp(-POW(val, 2)), 0)

        ctx.component2 = where(data < 0, (pi**(1/2))*(erf(val)+1), 0)

        ctx.component3 = data*(1-c*ctx.component1) - c*b*ctx.component2
        
        ctx.save_for_backward(data)

        return a*ctx.component3
        
    @staticmethod
    def backward(ctx, grad_output:tensor):
        
        (data, ) = ctx.saved_tensors
        
        grad_x = grad_a = grad_b = None

        if ctx.needs_input_grad[0]:
            grad_x = ctx.a*(1-ctx.c*ctx.component1)*grad_output
        if ctx.needs_input_grad[1]:
            grad_a = ctx.component3*grad_output
        if ctx.needs_input_grad[2]:
            grad_b = ctx.a*ctx.c*ctx.component2*grad_output
        if ctx.needs_input_grad[3]:
            grad_c = ctx.a*(data*ctx.component1 - ctx.b*ctx.component2)*grad_output

        #print(ctx.a, grad_a)
        #print(b, grad_b)
        
        return grad_x, grad_a, grad_b, grad_c
        
#Our custom grad function with trainable parameters
class supeRelu3(nn.Module):

    def __init__(self, a = 0, b = 0, c = 0) -> None:
        super(supeRelu3, self).__init__()
        self.a = nn.Parameter(a*torch.ones(1),requires_grad = True)
        self.b = nn.Parameter(b*torch.ones(1),requires_grad = True)
        self.c = nn.Parameter(c*torch.ones(1),requires_grad = True)
        #self.a = nn.Parameter(torch.rand(1), requires_grad = True)
        #self.b = nn.Parameter(b*torch.rand(1), requires_grad = True)
        self.fn = supeRelu2.apply

    def forward(self, x) -> tensor:

        return self.fn(x, exp(self.a), exp(self.b) , exp(self.c))

#Our custom grad function helper with "nice" parameters 
class supeRelu4(torch.autograd.Function):
        
    @staticmethod
    def forward(ctx, data:tensor):

        b = (pi)**(-0.5)# 2*(1.5/e)**(1.5) 
        
        grad = where(data < 0, 1-exp(-b**2*POW(data, -2)), 1)
        
        ctx.save_for_backward(grad)

        return data*grad - where(data < 0, b*pi**(0.5)*(erf(b*POW(data, -1))+1), 0)
        
    @staticmethod
    def backward(ctx, grad_output:tensor):
        
        return ctx.saved_tensors[0]*grad_output

#Our custom grad function as a nn.Module 
class supeRelu5(nn.Module):

    def __init__(self) -> None:
        super(supeRelu5, self).__init__()
        self.fn = supeRelu4.apply

    def forward(self, x) -> tensor:

        return self.fn(x)

#Another activation function (unnecessary)
class supeLeakRelu(torch.autograd.Function):
        
    @staticmethod
    def forward(ctx, data:tensor):

        ctx.a = 1.5**(0.5) 

        ctx.b = 0

        ctx.c = 0.5

        ctx.a1 = ctx.a*ctx.c*e**(ctx.a**2)*pi**(0.5)

        data = data + ctx.c 
        
        ctx.mask1 = data <= -ctx.c

        ctx.mask2 = ((-ctx.c < data ) & (data  < 0))

        ctx.mask3 = ((data < ctx.c) & (data  >= 0))

        ctx.mask4 = data >= ctx.c
        
        grad = ones_like(data) 

        grad[ctx.mask1] = ctx.a1 

        grad[ctx.mask2] = supeLeakRelu.helper_function2(ctx.a, ctx.c, ctx.a1, data[ctx.mask2] + ctx.c) 

        grad[ctx.mask3] = 2*data[ctx.mask3]-supeLeakRelu.helper_function2(ctx.a, ctx.c, ctx.a1, data[ctx.mask3] - ctx.c) 

        grad[ctx.mask4] = 2*data[ctx.mask4] + ctx.a1
        
        ctx.save_for_backward(data)

        a2 = ctx.c + ctx.a1*sp_erf(ctx.a)

        return ctx.b*data+(1-ctx.b)*0.5*(grad-a2) #- (ctx.b + (ctx.b-ctx.a)*(2+ctx.a1-a2)*0.5)

    @staticmethod
    def helper_function2(a, c, a1, data:tensor): 

        return data*supeLeakRelu.helper_function1(a, c, data) + a1*erf(a*c*POW(data, -1))

    @staticmethod
    def helper_function1(a, c, data:tensor): 

        return exp(a**2*(1 - c**2*POW(data, -2)))

    @staticmethod
    def backward(ctx, grad_output:tensor):

        (data, ) = ctx.saved_tensors

        dgrad = ones_like(data) 
        
        dgrad[ctx.mask1] = 0

        dgrad[ctx.mask2] = supeLeakRelu.helper_function1(ctx.a, ctx.c, data[ctx.mask2] + ctx.c)*0.5

        dgrad[ctx.mask3] = 1 - supeLeakRelu.helper_function1(ctx.a, ctx.c, data[ctx.mask3] - ctx.c)*0.5 

        dgrad = ctx.b + (1-ctx.b)*dgrad
        
        return dgrad*grad_output

class supeLeakRelu1(nn.Module):

    def __init__(self) -> None:
        super(supeLeakRelu1, self).__init__()
        self.fn = supeLeakRelu.apply

    def forward(self, x) -> tensor:

        return self.fn(x)

#squareplus function https://arxiv.org/abs/2112.11687
class squarePlus(torch.autograd.Function):
        
    @staticmethod
    def forward(ctx, data:tensor):

        ctx.b = 0.25

        val = POW(data, 2) + ctx.b
        
        ctx.save_for_backward(data, val)

        return 0.5*(data + POW(val, 0.5)) #- ctx.b
        
    @staticmethod
    def backward(ctx, grad_output:tensor):

        (data, val) = ctx.saved_tensors

        grad = 0.5*(1 + data*POW(val, -0.5))
        
        return grad*grad_output

class squarePlus1(nn.Module):

    def __init__(self) -> None:
        super(squarePlus1, self).__init__()
        self.fn =  squarePlus.apply

    def forward(self, x) -> tensor:

        return self.fn(x)

# https://link.springer.com/article/10.1007/s00521-017-3210-6
#Supposedly the best activation function 
class modifiedElliot(torch.autograd.Function):
        
    @staticmethod
    def forward(ctx, data:tensor):

        a = 1

        grad = POW((a + POW(data, 2)), 0.5)

        ctx.save_for_backward(grad, data)

        return 0.5*(grad + data - a**0.5)
        
    @staticmethod
    def backward(ctx, grad_output:tensor):

        (grad, data)  = ctx.saved_tensors

        grad = 0.5*(data*POW(grad, -1) + 1) 
        
        return grad*grad_output

In [11]:
#Test function and make sure it works 
torch.manual_seed(2)

#supeReluTest = softplusplus.apply 
#supeReluTest = supeRelu5()
#supeReluTest = supeLeakRelu1()
#supeReluTest = softcross.apply 
supeReluTest = squarePlus1()
#supeReluTest = modifiedElliot.apply

data = torch.randn(15, dtype=torch.float64, requires_grad=True) 
data

tensor([ 0.3923, -0.2236, -0.3195, -1.2050,  1.0445, -0.6332,  0.5731,  0.5409,
        -0.3919, -1.0427,  1.3186,  0.7476, -1.3265, -1.2413, -0.1028],
       dtype=torch.float64, requires_grad=True)

In [None]:
if 1 == 1:
    if torch.autograd.gradcheck(supeReluTest, data, eps=1e-8, atol=1e-7): 
        print(1)
    else: 
        print(0)