In [None]:
!pip install numba

AUTOMATIC PARALLELIZATION EXAMPLE:

In [None]:
from numba import njit, prange
import numpy as np
import time


# Function without parallelization
def no_parallelization_example(A):
    s = 0
    for i in range(A.shape[0]):
        s += A[i]
    return s

# Function with explicit prange for manual parallelization
@njit(parallel=True)
def manual_parallelization_example(A):
    s = 0
    for i in prange(A.shape[0]):
        s += A[i]
    return s

# Function without explicit prange, allowing automatic parallelization
@njit
def automatic_parallelization_example(A):
    s = 0
    for i in range(A.shape[0]):
        s += A[i]
    return s


# Create a 1D array with random values
n = 20000000
A = np.random.rand(n)

# Measure the execution time
start_time = time.time()
result_auto = automatic_parallelization_example(A)
auto_parallel_time = round(time.time() - start_time, 3)

start_time = time.time()
result_manual = manual_parallelization_example(A)
manual_parallel_time = round(time.time() - start_time, 3)

start_time = time.time()
result_no_parallel = no_parallelization_example(A)
no_parallel_time = round(time.time() - start_time, 3)

# Print timing information
print("No parallelization:", no_parallel_time, "sec")
print("Explicit parallelization:", manual_parallel_time, "sec")
print("Automatic parallelization:", auto_parallel_time, "sec")

RACE CONDITION:

In [None]:
from numba import njit, prange
import numpy as np


@njit(parallel=True)
def race_condition(x):
    """
    Demonstrates a race condition by accumulating into the entire array `y`
    from different parallel iterations of the loop.
    """
    n = x.shape[0]
    y = np.zeros(2)
    for i in prange(n):
        y[:] += x[i]

    # Print the result after the parallel section
    print("Result with race condition:", y)


@njit(parallel=True)
def correct_version(x):
    """
    Demonstrates correct parallel accumulation into the entire array `y`.
    """
    n = x.shape[0]
    y = np.zeros(2)
    for i in prange(n):
        y += x[i]

    # Print the result after the parallel section
    print("Result without race condition:", y)

# Example usage:
x = np.ones(1000000)
race_condition(x)
correct_version(x)

SCHEDULING OF TASKS:

In [None]:
from numba import njit, prange, set_parallel_chunksize, get_parallel_chunksize

@njit(parallel=True)
def chunk_size_exploration(n):
    acc = 0
    print(f"Chunk size before parallel region: {get_parallel_chunksize()}")
    for i in prange(n):
        if i == 0:
            print(f"Chunk size inside parallel region: {get_parallel_chunksize()}")
        acc += i
    print(f"Chunk size after parallel region: {get_parallel_chunksize()}")
    return acc

n = 4
result = set_parallel_chunksize(n)
result = chunk_size_exploration(n)

PERFORM PARALLEL DIAGNOSTIC:

In [5]:
@njit(parallel=True)
def test(x):
    n = x.shape[0]
    a = np.sin(x)
    b = np.cos(a * a)
    acc = 0
    for i in prange(n - 2):
        for j in prange(n - 1):
            acc += b[i] + b[j + 1]
    return acc

test(np.arange(10))

test.parallel_diagnostics(level=4)

 
 Parallel Accelerator Optimizing:  Function test, <ipython-input-5-fb83a01b854e>
 (1)  


Parallel loop listing for  Function test, <ipython-input-5-fb83a01b854e> (1) 
--------------------------------------|loop #ID
@njit(parallel=True)                  | 
def test(x):                          | 
    n = x.shape[0]                    | 
    a = np.sin(x)---------------------| #7
    b = np.cos(a * a)-----------------| #8
    acc = 0                           | 
    for i in prange(n - 2):-----------| #10
        for j in prange(n - 1):-------| #9
            acc += b[i] + b[j + 1]    | 
    return acc                        | 
--------------------------------- Fusing loops ---------------------------------
Attempting fusion of parallel loops (combines loops with similar properties)...
  Trying to fuse loops #7 and #8:
    - fusion succeeded: parallel for-loop #8 is fused into for-loop #7.
  Trying to fuse loops #7 and #10:
    - fusion failed: loop dimension mismatched in axis 0. sli

REFERENCES: https://numba.readthedocs.io/en/stable/user/parallel.html