## When is NumPy parallel?

In [None]:
import numpy as np
rng = np.random.default_rng(10)

## Is @ parallel?

In [None]:
A = rng.standard_normal(size=(15_000, 15_000))
B = rng.standard_normal(size=(15_000, 15_000))

In [None]:
C = A @ B

In [None]:
from threadpoolctl import threadpool_limits

In [None]:
with threadpool_limits(limits=8, user_api="blas"):
    C = A @ B

## Is svd parallel?

In [None]:
A = rng.standard_normal(size=(5_000, 3_000))

In [None]:
_ = np.linalg.svd(A)

In [None]:
with threadpool_limits(limits=8, user_api="blas"):
    _ = np.linalg.svd(A)

## Is np.exp parallel?

In [None]:
A = rng.standard_normal(size=(2_000_000))
out = np.empty_like(A)

In [None]:
%%timeit
_ = np.exp(A, out=out)

## When is NumPy parallel?

### Everything in https://numpy.org/doc/stable/reference/routines.linalg.html

## Can we make np.exp parallel?

### Cython

In [None]:
%load_ext Cython

In [None]:
%%cython
# cython: language_level=3, boundscheck=False, initializedcheck=False, wraparound=False
# distutils: extra_compile_args=-fopenmp
# distutils: extra_link_args=-fopenmp

from cython.parallel cimport prange
from libc.math cimport exp

def cython_exp(double[::1] A, double[::1] out):
    cdef int i
    for i in prange(A.shape[0], nogil=True):
        out[i] = exp(A[i])

In [None]:
out2 = np.empty_like(A)

In [None]:
%%timeit
cython_exp(A, out=out2)

In [None]:
%%timeit
with threadpool_limits(limits=8, user_api="openmp"):
    _ = cython_exp(A, out=out2)

### Numba

In [None]:
from numba import njit, prange
from math import exp

@njit(parallel=True)
def numba_exp(A, out):
    for i in prange(A.shape[0]):
        out[i] = exp(A[i])

In [None]:
out3 = np.empty_like(A)

In [None]:
%%timeit
numba_exp(A, out=out3)

In [None]:
import numba

numba.set_num_threads(8)

In [None]:
%%timeit
numba_exp(A, out=out3)