# Numba - CPU

Numba adalah JIT (Just-In-Time) compiler yang menerjemahkan code Python ke dalam bahasa machine.

* Menggunakan special decorator pada fungsi Python, Numba meng-compile fungsi tersebut ke dalam bahasa mesin menggunakan LLVM.
* Numba compatible dengan array NumPy.
* Dapat melakukan paralelisasi yang dapat memanfaatkan semua CPU core.

In [2]:
import numpy as np

def inner_rows(C,A,B):
    for i in range(len(A)):
        for j in range(len(A)):
            C[i,j] = A[i,j] + B[i,j]

def inner_cols(C,A,B):
    for j in range(len(A)):
        for i in range(len(A)):
            C[i,j] = A[i,j] + B[i,j]


def inner_alloc(C,A,B):
    for i in range(len(A)):
        for j in range(len(A)):
            val = [A[i,j] + B[i,j]]
            C[i,j] = val[0]


In [3]:
A = np.random.rand(100,100)
B = np.random.rand(100,100)
C = np.random.rand(100,100)


In [4]:
%timeit inner_rows(C,A,B)
%timeit inner_cols(C,A,B)
%timeit inner_alloc(C,A,B)

1.99 ms ± 56.6 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
2.07 ms ± 13.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
2.36 ms ± 7.36 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [5]:
%load_ext memory_profiler

In [6]:
%memit inner_rows(C,A,B)
%memit inner_cols(C,A,B)
%memit inner_alloc(C,A,B)

peak memory: 97.94 MiB, increment: 0.41 MiB
peak memory: 97.94 MiB, increment: 0.00 MiB
peak memory: 97.95 MiB, increment: 0.00 MiB


## Menggunakan Decorator JIT

### Contoh 1

In [7]:
import math
import numpy as np
import numba
import matplotlib.pyplot as plt

**Python**

In [8]:
def prima(n):
    if n <= 1:
        raise ArithmeticError('"%s" <= 1' % n)
    if n == 2 or n == 3:
        return True
    elif n % 2 == 0:
        return False
    else:
        n_sqrt = math.ceil(math.sqrt(n))
        for i in range(3, n_sqrt+1):
            if n % 1 == 0:
                return False
    
    return True

In [9]:
n = np.random.randint(2, 1000, dtype=np.int64)
print(n, prima(n))

952 False


**numba.jit**

In [10]:
@numba.jit
def prima_numba(n):
    if n <= 1:
        raise ArithmeticError('"%s" <= 1' % n)
    if n == 2 or n == 3:
        return True
    elif n % 2 == 0:
        return False
    else:
        n_sqrt = math.ceil(math.sqrt(n))
        for i in range(3, n_sqrt+1):
            if n % i == 0:
                return False
    
    return True

In [11]:
angka = np.random.randint(2, 1000_000, dtype=np.int64, size=10000)

%time p1 = [prima(n) for i in angka]
%time p2 = [prima_numba(n) for i in angka]

CPU times: user 5.18 ms, sys: 1 µs, total: 5.19 ms
Wall time: 5.19 ms
CPU times: user 245 ms, sys: 22.3 ms, total: 268 ms
Wall time: 304 ms


Compilation is falling back to object mode WITH looplifting enabled because Internal error in pre-inference rewriting pass encountered during compilation of function "prima_numba" due to: Constant inference not possible for: $const12.1 % n

File "../../../../../var/folders/d8/_nrt2sy9567f87n08gxx0xc00000gn/T/ipykernel_1091/2505651498.py", line 4:
<source missing, REPL/exec in use?>

  @numba.jit
Compilation is falling back to object mode WITHOUT looplifting enabled because Function "prima_numba" failed type inference due to: Unsupported constraint encountered: raise $18call_function.4

File "../../../../../var/folders/d8/_nrt2sy9567f87n08gxx0xc00000gn/T/ipykernel_1091/2505651498.py", line 4:
<source missing, REPL/exec in use?>

  @numba.jit

File "../../../../../var/folders/d8/_nrt2sy9567f87n08gxx0xc00000gn/T/ipykernel_1091/2505651498.py", line 3:
<source missing, REPL/exec in use?>

Fall-back from the nopython compilation path to the object mode compilation path has been detected, thi

**numba.njit** atau **numba.jit(nopython=True)**

In [12]:
@numba.njit
def prima_numba_njit(n):
    if n <= 1:
        raise ArithmeticError('"angka" <= 1')
    if n == 2 or n == 3:
        return True
    elif n % 2 == 0:
        return False
    else:
        n_sqrt = math.ceil(math.sqrt(n))
        for i in range(3, n_sqrt+1):
            if n % i == 0:
                return False
    
    return True

In [13]:
%time p1 = [prima(n) for i in angka]
%time p2 = [prima_numba_njit(n) for i in angka]

CPU times: user 5.41 ms, sys: 72 µs, total: 5.48 ms
Wall time: 5.47 ms
CPU times: user 39.3 ms, sys: 1.85 ms, total: 41.1 ms
Wall time: 40.6 ms


### Contoh 2

In [14]:
import numba
import numpy as np

In [15]:
def py_sum(x):
    hasil = 0
    for i in range(len(x)):
        hasil = hasil + x[i]
    return hasil

In [16]:
@numba.jit(nopython=True) # Decorator Numba
def numba_sum(x):
    hasil = 0
    for i in range(len(x)):
        hasil = hasil + x[i]
    return hasil

In [17]:
# generating data
x = np.random.randint(10, 100, 100_000)
x.shape

(100000,)

In [18]:
%timeit py_sum(x)

7.36 ms ± 49.7 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [19]:
%timeit numba_sum(x)

16.2 µs ± 16 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


### Benchmark Vektor Python vs NumPy vs Numba

In [20]:
import numpy as np
import numba
from time import time

In [21]:
def vekPy(a, b, c, d):
    for i in range(a.shape[0]):
        a[i] = b[i] + c[i] * d[i]

def vekNumpy(a, b, c, d):
    a[:] = b + c * d
    
@numba.njit
def vekNumba(a, b, c, d):
    for i in range(a.shape[0]):
        a[i] = b[i] + c[i] * d[i]

In [22]:
## Inisialisasi

n = 10000 # ukuran vektor
r = 100 # jumlah iterasi

a = np.zeros(n, dtype=np.float64)
b = np.empty_like(a)
b[:] = 1.0
c = np.empty_like(a)
c[:] = 1.0
d = np.empty_like(a)
d[:] = 1.0

**Python**

In [23]:
start = time()

for i in range(r):
    vekPy(a,b,c,d)
end = time()

mflops = 2.0 * r * n / ((end - start) * 1e6)
print("Python: {} MFlops/sec".format(mflops))

Python: 9.827328959700093 MFlops/sec


**Numpy**

In [24]:
start = time()

for i in range(r):
    vekNumpy(a,b,c,d)

mflops = 2.0 * r * n / ((time() - start) * 1e6)
print("Numpy: {} MFlops/sec".format(mflops))

Numpy: 1344.112802435507 MFlops/sec


**Numba**

In [25]:
start = time()

for i in range(r):
    vekNumba(a,b,c,d)

mflops = 2.0 * r * n / ((time() - start) * 1e6)
print("Numba: {} MFlops/sec".format(mflops))

Numba: 55.841036325995354 MFlops/sec


### Latihan

Buat fungsi `numba_sum(x, y)` untuk menghitung jarak-$L_1$ 

$$
L_1 = \sum_{i=0}^{N-1} |x_i - y_i|.
$$

Kemudian, buat perbandingan waktu komputasi antara **python original**, menggunakan **numpy.sum**, dan **numba**. Gunakan modul `from time import time` untuk menghitung waktu eksekusinya.

Jalankan 10 kali perhitungan di atas dan simpan hasilnya kemudian tampilkan nilai rata-rata dan standard deviasi dari perhitungan tersebut.

In [26]:
print(hasil_py_sum)
print(hasil_np_sum)
print(hasil_numba_sum)

NameError: name 'hasil_py_sum' is not defined

### Contoh 3

In [None]:
import math

@numba.njit(['boolean(int64)', 'boolean(int32)', 'boolean(int16)'])
def prima_numba_eager(n):
    if n <= 1:
        raise ArithmeticError('n <= 1')
    if n == 2 or n == 3:
        return True
    elif n % 2 == 0:
        return False
    else:
        n_sqrt = math.ceil(math.sqrt(n))
        for i in range(3, n_sqrt+1):
            if n % i == 0:
                return False
    
    return True

In [None]:
prima_numba_eager.signatures

In [None]:
rng = np.random.default_rng()

In [None]:
angka = rng.integers(2, 1000_000, size=10000)
%time p1 = [prima_numba_eager(n) for i in angka]
%time p2 = [prima_numba_eager(n) for i in angka]

In [None]:
p1 = [prima_numba_eager(n) for i in angka.astype(np.int32)]
p2 = [prima_numba_eager(n) for i in angka.astype(np.float64)]

## Parallel Computing dengan Numba

![](https://github.com/bsotomayorg/Intro_HPC_Python/blob/main/notebooks/imgs/slides_d2/046.PNG?raw=1)

### Contoh 1

In [None]:
import numpy as np
import numba

In [None]:
def deret_serial(x):
    hasil = 0
    for i in range(x.shape[0]):
        hasil = hasil + x[i]
    return hasil

In [None]:
def deret_numpy(x):
    return np.sum(x)

In [None]:
@numba.njit(parallel=True)
def deret_paralel(x):
    hasil = 0
    for i in numba.prange(x.shape[0]):
        hasil = hasil + x[i]
    return hasil

In [None]:
x = np.random.randint(1, 100, size=1000_000)

In [None]:
%timeit deret_serial(x)

In [None]:
%timeit deret_numpy(x)

In [None]:
%timeit deret_paralel(x)

### Contoh 2: Mandelbrot Set

```
for each pixel (Px, Py) on the screen do
    x0 := scaled x coordinate of pixel (scaled to lie in the Mandelbrot X scale (-2.00, 0.47))
    y0 := scaled y coordinate of pixel (scaled to lie in the Mandelbrot Y scale (-1.12, 1.12))
    x := 0.0
    y := 0.0
    iteration := 0
    max_iteration := 1000
    while (x*x + y*y ≤ 2*2 AND iteration < max_iteration) do
        xtemp := x*x - y*y + x0
        y := 2*x*y + y0
        x := xtemp
        iteration := iteration + 1
    
    color := palette[iteration]
    plot(Px, Py, color)
```

In [None]:
def mandelbrot(X, Y, maxiter):
    mandel = np.empty(shape=X.shape, dtype=np.int32)
    for i in range(X.shape[0]):
        for j in range(X.shape[1]):
            it = 0
            cx = X[i, j]
            cy = Y[i, j]
            x = 0.0
            y = 0.0
            while x * x + y * y < 4.0 and it < maxiter:
                xtemp = x * x - y * y + cx
                y = 2.0 * x * y + cy
                x = xtemp
                it += 1
            mandel[i, j] = it
            
    return mandel

In [None]:
X, Y = np.meshgrid(np.linspace(-2.0, 1.0, 1000), np.linspace(-1.0, 1.0, 1000))

**Python**

In [None]:
%time m = mandelbrot(X, Y, 100)

**Fraktal Mandelbrot**

In [None]:
fig = plt.figure(figsize=(7, 8))
ax = fig.add_subplot(111)

ax.set_aspect('equal')
ax.set_ylabel('Im[c]')
ax.set_xlabel('Re[c]')
ax.imshow(m, extent=[-2.0, 1.0, -1.0, 1.0])

plt.show()

**Paralel Numba**

In [None]:
@numba.njit(parallel=True)
def mandelbrot_numba(X, Y, maxiter):
    mandel = np.empty(shape=X.shape, dtype=np.int32)
    for i in numba.prange(X.shape[0]):
        for j in numba.prange(Y.shape[1]):
            it = 0
            cx = X[i, j]
            cy = Y[i, j]
            x = 0.0
            y = 0.0
            while x * x + y * y < 4.0 and it < maxiter:
                xtemp = x * x - y * y + cx
                y = 2.0 * x * y + cy
                x = xtemp
                it += 1
            mandel[i, j] = it
            
    return mandel

In [None]:
%time m_numba = mandelbrot_numba(X, Y, 100)

In [None]:
fig = plt.figure(figsize=(7, 8))
ax = fig.add_subplot(111)

ax.set_aspect('equal')
ax.set_ylabel('Im[c]')
ax.set_xlabel('Re[c]')
ax.imshow(m_numba, extent=[-2.0, 1.0, -1.0, 1.0])

plt.show()

### Informasi tentang Paralel Numba

In [None]:
mandelbrot_numba.parallel_diagnostics(level=3)