## Introduction

In [1]:
def func_one(n):
    result = 0
    for i in range(n):
        squared = n * n
        result += squared
    return result

def func_two(n):
    result = 0
    squared = n * n
    for i in range(n):
        result += squared
    return result


In [2]:
%timeit func_one(10000)
%timeit func_two(10000)

397 µs ± 1.49 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
268 µs ± 1.58 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [3]:
import numba as nb

@nb.njit
def func_one(n):
    result = 0
    for i in range(n):
        squared = n * n
        result += squared
    return result

@nb.njit
def func_two(n):
    result = 0
    squared = n * n
    for i in range(n):
        result += squared
    return result

func_one(1); func_two(2);

In [4]:
%timeit func_one(10000)
%timeit func_two(10000)

99.2 ns ± 0.878 ns per loop (mean ± std. dev. of 7 runs, 10,000,000 loops each)
98.8 ns ± 0.567 ns per loop (mean ± std. dev. of 7 runs, 10,000,000 loops each)


## Parallel

In [5]:
import numpy as np

@nb.njit()
def hypot_n(x, y):
    return (x**2 + y**2)**0.5

@nb.njit(parallel=True, fastmath=True)
def hypot_p(x, y):
    return (x**2 + y**2)**0.5

r1 = np.random.random(size=(2000, 2000))
r2 = np.random.random(size=(2000, 2000))

# We will call both functions once to compile them.
hypot_n(r1, r2); hypot_p(r1, r2);

In [6]:
%timeit hypot_n(r1, r2)
%timeit (r1 ** 2 + r2 ** 2)**0.5
%timeit hypot_p(r1, r2)

7.69 ms ± 38.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
12.9 ms ± 62.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
4.25 ms ± 392 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


## Types

In [7]:
from numba import float64

float64[:, :](float64[:, :], float64[:, :])

(array(float64, 2d, A), array(float64, 2d, A)) -> array(float64, 2d, A)

In [8]:
@nb.njit(float64[:, :](float64[:, :], float64[:, :]), parallel=True, fastmath=True)
def hypot_t(x, y):
    return (x**2 + y**2)**0.5

hypot_t(r1, r2)

array([[1.25252783, 0.8858779 , 1.16478573, ..., 0.68519736, 0.93683863,
        0.1157479 ],
       [0.55672638, 0.87614166, 0.8830354 , ..., 0.28155445, 0.93778775,
        0.85374986],
       [0.84842341, 0.26206338, 1.08365793, ..., 0.32440571, 0.30761915,
        1.2717025 ],
       ...,
       [1.00921872, 1.00467014, 0.90317148, ..., 0.9582827 , 1.0849355 ,
        0.38325605],
       [0.39270304, 0.76392759, 1.1773669 , ..., 1.15297935, 0.95138762,
        0.25601513],
       [0.59720513, 0.84778881, 0.2440707 , ..., 0.47936671, 0.57072272,
        0.67588545]])

In [9]:
%timeit hypot_t(r1, r2)

4.58 ms ± 424 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


## Vectorize

In [10]:
arr = np.ones((5,4))*1.01

In [11]:
from numba import vectorize, float64, float32

@vectorize([float64(float64, float64), float32(float32, float32)])
def cumprod(x, y):
    return x * y

In [12]:
cumprod.accumulate(arr, axis=0)
cumprod.accumulate(arr, axis=1)
cumprod.reduce(arr, axis=0)
cumprod.reduce(arr, axis=1)

array([1.04060401, 1.04060401, 1.04060401, 1.04060401, 1.04060401])