# Lab 09 - High Performance Computing
#### Module Imports

In [9]:
import multiprocessing as mproc, numba, numpy, time
from numba import njit

## Exercise 01 - Number of Processors

In [2]:
print("Number of processors:", mproc.cpu_count())

Number of processors: 8


## Exercise 02 - Array Normalization

In [3]:
def create_list(rows, cols, min, max):
    return numpy.random.randint(min, max, size=[rows, cols])

In [4]:
def normalize_row(row):
    COLS = len(row)
    norm_row = numpy.zeros(shape=(COLS))
    ROW_MIN = min(row)
    ROW_MAX = max(row)

    for col in range(COLS):
        norm_row[col] = (row[col] - ROW_MIN) / (ROW_MAX - ROW_MIN)

    return norm_row

In [5]:
def normalize_by_row(lst):
    ROWS, COLS = lst.shape
    norm_list = numpy.zeros((ROWS, COLS))

    for row in range(ROWS):
        norm_list[row] = normalize_row(lst[row][:])

    return norm_list

In [6]:
MIN = 0
MAX = 10
SIZES = [ 100, 500, 1000, 2000, 3000 ]

for size in SIZES:
    lst = create_list(size, size, MIN, MAX)
    START = time.time()
    norm_list = normalize_by_row(lst)
    print("Sequential normalization by row for size", size, "took", time.time() - START, "seconds")

Sequential normalization by row for size 100 took 0.005998849868774414 seconds
Sequential normalization by row for size 500 took 0.13302087783813477 seconds
Sequential normalization by row for size 1000 took 0.5031018257141113 seconds
Sequential normalization by row for size 2000 took 2.0174059867858887 seconds
Sequential normalization by row for size 3000 took 4.352947950363159 seconds


## Exercise 03 - Parallelization Using pool.apply
#### Constant Definitions

In [7]:
CPU_COUNT = mproc.cpu_count()

pool = mproc.Pool(CPU_COUNT)

for size in SIZES:
    START = time.time()
    results = [pool.apply(normalize_row, args=(row,)) for row in lst]
    print("Parallel normalization by row for size", size, "took", time.time() - START, "seconds")

pool.close()

## Exercise 04 - Parallelization Using pool.apply async

In [8]:
pool = mproc.Pool(CPU_COUNT)

for size in SIZES:
    lst = create_list(size, size, MIN, MAX)
    START = time.time()
    tasks = [pool.apply_async(normalize_row, args=(row,)) for row in lst]
    print("Async parallel normalization by row for size", size, "took", time.time() - START, "seconds")

pool.close()

Async parallel normalization by row for size 100 took 0.0409998893737793 seconds
Async parallel normalization by row for size 500 took 0.07799983024597168 seconds
Async parallel normalization by row for size 1000 took 0.043001651763916016 seconds
Async parallel normalization by row for size 2000 took 0.059998273849487305 seconds
Async parallel normalization by row for size 3000 took 0.024000167846679688 seconds


## Exercise 05 - Numba and JIT

In [17]:
@njit(parallel=True)
def jit_trace(lst, rows, cols):
    trace = 0

    for row in range(rows):
        for col in range(cols):
            trace += numpy.tanh(lst[row][col])

    return trace

In [18]:
for size in SIZES:
    lst = numpy.arange(size * size).reshape(size, size)
    START = time.time()
    jit_trace(lst, size, size)
    print("JITted parallel tracing for size", size, "took", time.time() - START, "seconds")

The keyword argument 'parallel=True' was specified but no transformation for parallel execution was possible.

To find out why, try turning on parallel diagnostics, see https://numba.pydata.org/numba-doc/latest/user/parallel.html#diagnostics for help.
[1m
File "<ipython-input-17-6ea592bbd34b>", line 2:[0m
[1m@njit(parallel=True)
[1mdef jit_trace(lst, rows, cols):
[0m[1m^[0m[0m
[0m
JITted parallel normalization by row for size 100 took 0.2651340961456299 seconds
JITted parallel normalization by row for size 500 took 0.0019996166229248047 seconds
JITted parallel normalization by row for size 1000 took 0.009998559951782227 seconds
JITted parallel normalization by row for size 2000 took 0.045000553131103516 seconds
JITted parallel normalization by row for size 3000 took 0.08295917510986328 seconds


## Exercise 06 - Normalization Using JIT

In [25]:
@njit(parallel=True)
def normalize_row_jit(row):
    COLS = len(row)
    norm_row = numpy.zeros(shape=(COLS))
    ROW_MIN = min(row)
    ROW_MAX = max(row)

    for col in numba.prange(COLS):
        norm_row[col] = (row[col] - ROW_MIN) / (ROW_MAX - ROW_MIN)

    return norm_row

In [27]:
@njit(parallel=True)
def normalize_by_row_jit(lst):
    ROWS, COLS = lst.shape
    norm_list = numpy.zeros((ROWS, COLS))

    for row in numba.prange(ROWS):
        norm_list[row] = normalize_row_jit(lst[row][:])

    return norm_list

In [28]:
for size in SIZES:
    lst = create_list(size, size, MIN, MAX)
    START = time.time()
    normalize_by_row_jit(lst)
    print("JIT parallel normalization by row for size", size, "took", time.time() - START, "seconds")

TypingError: Failed in nopython mode pipeline (step: nopython frontend)
[1mNameError: name 'numba' is not defined[0m