In [41]:
import math
import time
import numba
import numpy as np
from numba import guvectorize, float32, float64

In [42]:
# The target keyword decorator for loss and error return

def gu_activate(func, *args, **kwargs):
    """
    The guvectorize decorated method, runs in nopython mode
    :param func: a loss error method
    :param target: choose between | 1: None -> serial | 2: 'parallel' -> parallel | execution method
    :return: the decorated loss, error function
    """
    kwargs_ = {k: v for k, v in kwargs.items() if v is not None}
    return guvectorize([(float32[:], float32[:]),
                      (float64[:], float64[:])],
                     '(n)->(n)', nopython=True, *args, **kwargs_)(func)

In [43]:
def relu(x, y):
    for i in range(x.shape[0]):
        y[i] = max(0, x[i])

def relu_d(x, y):
    for i in range(x.shape[0]):
        y[i] = 1 if x[i] > 0 else 0

def tanh(x, y):
    for i in range(x.shape[0]):
        y[i] = math.tanh(x[i])

def tanh_d(x, y):
    for i in range(x.shape[0]):
        y[i] = 1 - x[i]**2

def sigmoid(x, y):
    for i in range(x.shape[0]):
        y[i] = 1./(1 + np.exp(-x[i]))

def sigmoid_d(x, y):
    for i in range(x.shape[0]):
        y[i] = x[i] - x[i]**2

def py_relu(x):
    x[x<0] = 0
    return x
def py_d_relu(y):
    y[y>0] = 1
    return y
def py_tanh(x):
    return np.tanh(x)
def py_d_tanh(y):
    return 1 - y**2

def py_sigmoid(x):
    return 1./(1 + np.exp(-x))
def py_d_sigmoid(y):
    return y - y**2

In [44]:
neurons = np.random.uniform(-1, 1, 50000000).astype(np.float32)
neurons

array([-0.77226245,  0.8425378 , -0.30387393, ..., -0.2894455 ,
       -0.9846776 ,  0.17497383], dtype=float32)

In [45]:
s_relu = gu_activate(relu, fastmath=True)
p_relu = gu_activate(relu, target='parallel', fastmath=True)
c_relu = gu_activate(relu, target='cuda')
s_relu_d = gu_activate(relu_d, fastmath=True)
p_relu_d = gu_activate(relu_d, target='parallel', fastmath=True)
c_relu_d = gu_activate(relu_d, target='cuda')

s_tanh = gu_activate(tanh, fastmath=True)
p_tanh = gu_activate(tanh, target='parallel', fastmath=True)
c_tanh = gu_activate(tanh, target='cuda')
s_tanh_d = gu_activate(tanh_d, fastmath=True)
p_tanh_d = gu_activate(tanh_d, target='parallel', fastmath=True)
c_tanh_d = gu_activate(tanh_d, target='cuda')

s_sigmoid = gu_activate(sigmoid, fastmath=True)
p_sigmoid = gu_activate(sigmoid, target='parallel', fastmath=True)
s_sigmoid_d = gu_activate(sigmoid_d, fastmath=True)
p_sigmoid_d = gu_activate(sigmoid_d, target='parallel', fastmath=True)

In [46]:
def ntime(func1):
    start = time.time()
    func1(neurons)
    end = time.time()
    print("Elapsed Numba Pre = %s" % (end - start))
    start = time.time()
    f2 = func1(neurons)
    end = time.time()
    print("Elapsed Numba Post = %s" % (end - start))
    return f2

def ptime(func1):
    start = time.time()
    f1 = func1(neurons)
    end = time.time()
    print("Elapsed PyTime = %s" % (end - start))
    return f1

In [47]:
ntime(s_sigmoid), ptime(py_sigmoid)

Elapsed Numba Pre = 0.28424763679504395
Elapsed Numba Post = 0.2742300033569336
Elapsed PyTime = 0.3400919437408447


(array([0.3159899 , 0.69899946, 0.42461073, ..., 0.42813963, 0.27196464,
        0.5436322 ], dtype=float32),
 array([0.3159899 , 0.6989994 , 0.42461073, ..., 0.42813963, 0.27196464,
        0.5436322 ], dtype=float32))

Try Cuda Activations

In [48]:
from numba import cuda
cuda.select_device(0)
ntime(c_tanh)

Elapsed Numba Pre = 17.05155324935913
Elapsed Numba Post = 17.003385066986084


array([-0.64824307,  0.68715096, -0.29485375, ..., -0.28162435,
       -0.7550837 ,  0.17320979], dtype=float32)