In [1]:
import math
import numpy as np
from numba import jit, njit, prange
from joblib import Parallel, delayed
from functools import partial
import multiprocessing as mp
from multiprocessing import Pool, Value, Array
from ipyparallel import Client
%load_ext cython

**1**. (100 points)

Write a predicate function `is_prime` that efficiently checks whether a number is prime. Use this to write a second function `primes_between` that returns the prime numbers between two integers as a `numpy` array.

- (10 points) Do this in regular Python 
- (10 points) Accelerate using `numba` (serial version) 
- (15 points) Accelerate using `numba` (parallel version)
- (10 points) Accelerate using `cython` (serial version) 
- (15 points) Accelerate using `cython` (parallel version)
- (10 points) Report the speed-up multiplier as an integer of the `numba` and `cython` serial and parallel versions using `timeit` in a DataFrame for the numbers between 0 and 1,000,000
- (10 points each) Run the serial version of the python `primes_between` function in parallel using
    - `multiprocessing`
    - `joblib`
    - `ipyparallel`

- (10 points) Do this in regular Python 



*Solution from Midterm 1 is used for is_prime( ) function*

In [2]:
def is_prime(n):
    """Returns True if a given integer n is prime and false otherwise"""

    if n == 2:
        return True
    elif n < 2 or n % 2 == 0:
        return False
    else:
        for i in range(3, int(np.sqrt(n))+1, 2):
            if n % i == 0:
                return False
    return True

In [3]:
def primes_between(n1, n2):
    """Returns prime numbers between n1 and n2 (exclusive on both ends) as a numpy array"""
    
    # Check to see which argument is larger
    if n1 > n2:
        n1, n2 = n2, n1
    
    # Initialize output and loop through all numbers between n1 and n2
    primes = []
    for num in range(n1 + 1, n2):
        if is_prime(num):
            primes.append(num)
    
    # Return result
    return np.array(primes)

In [4]:
# Prove that function works
primes_between(0, 50)

array([ 2,  3,  5,  7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47])

In [5]:
%%timeit
# Time function
primes_between(0, 1000)

1.29 ms ± 9.83 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


- (10 points) Accelerate using `numba` (serial version) 



In [6]:
@jit(nopython=True, cache=True)
def is_prime_numba_serial(n):
    """Returns True if a given integer n is prime and false otherwise"""

    if n == 2:
        return True
    elif n < 2 or n % 2 == 0:
        return False
    else:
        for i in range(3, int(np.sqrt(n))+1, 2):
            if n % i == 0:
                return False
    return True

In [10]:
@jit(nopython=True, cache=True)
def primes_between_numba_serial(n1, n2):
    """Returns prime numbers between n1 and n2 (exclusive on both ends) as a numpy array"""
    
    # Check to see which argument is larger
    if n1 > n2:
        n1, n2 = n2, n1
    
    # Initialize output and loop through all numbers between n1 and n2
    primes = []
    for num in range(n1 + 1, n2):
        if is_prime_numba_serial(num):
            primes.append(num)
    
    # Return result
    return np.array(primes)

In [11]:
# Prove that function works
primes_between_numba_serial(0, 50)

array([ 2,  3,  5,  7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47])

In [12]:
%%timeit
# Time function
primes_between_numba_serial(0, 1000)

21.9 µs ± 273 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


- (15 points) Accelerate using `numba` (parallel version)



**Need to fix this problem**

In [9]:
@njit(parallel=True)
def is_prime_numba_parallel(n):
    """Returns True if a given integer n is prime and false otherwise"""

    if n == 2:
        return True
    elif n < 2 or n % 2 == 0:
        return False
    else:
        for i in prange(3, int(np.sqrt(n))+1, 2):
            if n % i == 0:
                return False
    return True

In [10]:
is_prime_numba_parallel(1000)

The keyword argument 'parallel=True' was specified but no transformation for parallel execution was possible.

To find out why, try turning on parallel diagnostics, see http://numba.pydata.org/numba-doc/latest/user/parallel.html#diagnostics for help.

File "<ipython-input-9-43e8d3039c1b>", line 2:
@njit(parallel=True)
def is_prime_numba_parallel(n):
^

  state.func_ir.loc))


False

In [10]:
is_prime_numba_parallel.parallel_diagnostics(level=4)

 
 Parallel Accelerator Optimizing:  Function is_prime_numba_parallel, <ipython-
input-8-43e8d3039c1b> (1)  


Parallel loop listing for  Function is_prime_numba_parallel, <ipython-input-8-43e8d3039c1b> (1) 
----------------------------------------------------------------------------|loop #ID
@njit(parallel=True)                                                        | 
def is_prime_numba_parallel(n):                                             | 
    """Returns True if a given integer n is prime and false otherwise"""    | 
                                                                            | 
    if n == 2:                                                              | 
        return True                                                         | 
    elif n < 2 or n % 2 == 0:                                               | 
        return False                                                        | 
    else:                                                                 

In [17]:
@njit(parallel=True)
def primes_between_numba_parallel(n1, n2):
    """Returns prime numbers between n1 and n2 (exclusive on both ends) as a numpy array"""
    
    # Check to see which argument is larger
    if n1 > n2:
        n1, n2 = n2, n1
    
    # Initialize output and loop through all numbers between n1 and n2
    primes = []
    for num in prange(n1 + 1, n2):
        if is_prime_numba_parallel(num):
            primes.append(num)
    
    # Return result
    return np.array(primes)

In [None]:
# Prove that function works
primes_between_numba_parallel(0, 650)

In [6]:
primes_between_numba_parallel.parallel_diagnostics(level=4)

 
 Parallel Accelerator Optimizing:  Function primes_between_numba_parallel, 
<ipython-input-4-ed4a68b43478> (1)  


Parallel loop listing for  Function primes_between_numba_parallel, <ipython-input-4-ed4a68b43478> (1) 
-----------------------------------------------------------------------------------------------|loop #ID
@njit(parallel=True)                                                                           | 
def primes_between_numba_parallel(n1, n2):                                                     | 
    """Returns prime numbers between n1 and n2 (exclusive on both ends) as a numpy array"""    | 
    # Initialize output and loop through all numbers between n1 and n2                         | 
    primes = []                                                                                | 
    for num in prange(n1 + 1, n2):-------------------------------------------------------------| #0
        if is_prime_numba_parallel(num):                                             

- (10 points) Accelerate using `cython` (serial version) 



In [13]:
%%cython -a

import cython
import numpy as np

def is_prime_cython_serial(int n):
    """Returns True if a given integer n is prime and false otherwise"""
    
    cdef int i
    if n == 2:
        return True
    elif n < 2 or n % 2 == 0:
        return False
    else:
        for i in range(3, int(np.sqrt(n))+1, 2):
            if n % i == 0:
                return False
    return True

def primes_between_cython_serial(int n1, int n2, list primes):
    """Returns prime numbers between n1 and n2 (exclusive on both ends) as a numpy array"""
    
    # Check to see which argument is larger
    if n1 > n2:
        n1, n2 = n2, n1
    
    # Loop through all numbers between n1 and n2
    cdef int num
    for num in range(n1 + 1, n2):
        if is_prime_cython_serial(num):
            primes.append(num)
    
    # Return result
    return np.array(primes)

In [14]:
# Prove that function works
primes = []
primes_between_cython_serial(1, 50, primes)

array([ 2,  3,  5,  7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47])

In [15]:
%%timeit
# Time function
primes = []
primes_between_cython_serial(1, 1000, primes)

727 µs ± 4.62 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


- (15 points) Accelerate using `cython` (parallel version)



**Need to fix this problem**

In [10]:
%%cython --compile-args=-fopenmp --link-args=-fopenmp --force -I /usr/local/opt/libomp/include -L /usr/local/opt/libomp/lib

import cython
from cython.parallel import parallel, prange
import math
import numpy as np

def is_prime_cython_parallel(int n):
    """Returns True if a given integer n is prime and false otherwise"""
    
    cdef int i
    with cython.nogil, parallel(): 
        if n == 2:
            return True
        elif n < 2 or n % 2 == 0:
            return False
        else:
            for i in range(3, int(np.sqrt(n))+1, 2):
                if n % i == 0:
                    return False
        return True

def primes_between_cython_parallel(int n1, int n2, list primes):
    """Returns prime numbers between n1 and n2 (exclusive on both ends) as a numpy array"""
    
    # Loop through all numbers between n1 and n2
    cdef int num
    with cython.nogil, parallel():    
        for num in prange(n1 + 1, n2):
            if is_prime_cython_parallel(num):
                primes.append(num)
    
    # Return result
    return np.array(primes)


Error compiling Cython file:
------------------------------------------------------------
...
    
    # Loop through all numbers between n1 and n2
    cdef int num
    with cython.nogil, parallel():    
        for num in prange(n1 + 1, n2):
            if is_prime_cython_parallel(num):
                                      ^
------------------------------------------------------------

/home/jovyan/.cache/ipython/cython/_cython_magic_df6a768c27cbcde6077d9f109a110b51.pyx:28:39: Truth-testing Python object not allowed without gil

Error compiling Cython file:
------------------------------------------------------------
...
    
    # Loop through all numbers between n1 and n2
    cdef int num
    with cython.nogil, parallel():    
        for num in prange(n1 + 1, n2):
            if is_prime_cython_parallel(num):
                                      ^
------------------------------------------------------------

/home/jovyan/.cache/ipython/cython/_cython_magic_df6a768c27cbcde6077d9f

- (10 points) Report the speed-up multiplier as an integer of the `numba` and `cython` serial and parallel versions using `timeit` in a DataFrame for the numbers between 0 and 1,000,000



**I think all parts below here are wrong. Rather than going from 0 to n for n in range(0, 1M), I think we just have to do 0-1M one time**

- (10 points) `multiprocessing`



In [16]:
# Fix one of the arguments of primes_between at 1, and then use parallel processing
primes_between_partial = partial(primes_between, n2 = 0)
with mp.Pool(processes=8) as pool:
    res = pool.map(primes_between_partial, range(0, 1000))
    
# Display result
res

[array([], dtype=float64),
 array([], dtype=float64),
 array([], dtype=float64),
 array([2]),
 array([2, 3]),
 array([2, 3]),
 array([2, 3, 5]),
 array([2, 3, 5]),
 array([2, 3, 5, 7]),
 array([2, 3, 5, 7]),
 array([2, 3, 5, 7]),
 array([2, 3, 5, 7]),
 array([ 2,  3,  5,  7, 11]),
 array([ 2,  3,  5,  7, 11]),
 array([ 2,  3,  5,  7, 11, 13]),
 array([ 2,  3,  5,  7, 11, 13]),
 array([ 2,  3,  5,  7, 11, 13]),
 array([ 2,  3,  5,  7, 11, 13]),
 array([ 2,  3,  5,  7, 11, 13, 17]),
 array([ 2,  3,  5,  7, 11, 13, 17]),
 array([ 2,  3,  5,  7, 11, 13, 17, 19]),
 array([ 2,  3,  5,  7, 11, 13, 17, 19]),
 array([ 2,  3,  5,  7, 11, 13, 17, 19]),
 array([ 2,  3,  5,  7, 11, 13, 17, 19]),
 array([ 2,  3,  5,  7, 11, 13, 17, 19, 23]),
 array([ 2,  3,  5,  7, 11, 13, 17, 19, 23]),
 array([ 2,  3,  5,  7, 11, 13, 17, 19, 23]),
 array([ 2,  3,  5,  7, 11, 13, 17, 19, 23]),
 array([ 2,  3,  5,  7, 11, 13, 17, 19, 23]),
 array([ 2,  3,  5,  7, 11, 13, 17, 19, 23]),
 array([ 2,  3,  5,  7, 11, 13, 

- (10 points) `joblib`



In [18]:
Parallel(n_jobs=8)(delayed(primes_between)(0, i) for i in range(0, 1000))

CPU times: user 424 ms, sys: 56 ms, total: 480 ms
Wall time: 431 ms


[array([], dtype=float64),
 array([], dtype=float64),
 array([], dtype=float64),
 array([2]),
 array([2, 3]),
 array([2, 3]),
 array([2, 3, 5]),
 array([2, 3, 5]),
 array([2, 3, 5, 7]),
 array([2, 3, 5, 7]),
 array([2, 3, 5, 7]),
 array([2, 3, 5, 7]),
 array([ 2,  3,  5,  7, 11]),
 array([ 2,  3,  5,  7, 11]),
 array([ 2,  3,  5,  7, 11, 13]),
 array([ 2,  3,  5,  7, 11, 13]),
 array([ 2,  3,  5,  7, 11, 13]),
 array([ 2,  3,  5,  7, 11, 13]),
 array([ 2,  3,  5,  7, 11, 13, 17]),
 array([ 2,  3,  5,  7, 11, 13, 17]),
 array([ 2,  3,  5,  7, 11, 13, 17, 19]),
 array([ 2,  3,  5,  7, 11, 13, 17, 19]),
 array([ 2,  3,  5,  7, 11, 13, 17, 19]),
 array([ 2,  3,  5,  7, 11, 13, 17, 19]),
 array([ 2,  3,  5,  7, 11, 13, 17, 19, 23]),
 array([ 2,  3,  5,  7, 11, 13, 17, 19, 23]),
 array([ 2,  3,  5,  7, 11, 13, 17, 19, 23]),
 array([ 2,  3,  5,  7, 11, 13, 17, 19, 23]),
 array([ 2,  3,  5,  7, 11, 13, 17, 19, 23]),
 array([ 2,  3,  5,  7, 11, 13, 17, 19, 23]),
 array([ 2,  3,  5,  7, 11, 13, 

- (10 points) `ipyparallel`

In [2]:
# Connect to cluster of remote engines
rc = Client()
dv = rc[:]

In [3]:
# Define nested primes_between function
def primes_between_ipyparallel(n1, n2):
    """Returns prime numbers between n1 and n2 (exclusive on both ends) as a numpy array"""
    
    # Import numpy
    import numpy as np
    
    # Define predicate function with "primes_between" so that it is recognized within cluser
    def is_prime_ipyparallel(n):
        """Returns True if a given integer n is prime and false otherwise"""

        # Function
        if n == 2:
            return True
        elif n < 2 or n % 2 == 0:
            return False
        else:
            for i in range(3, int(np.sqrt(n))+1, 2):
                if n % i == 0:
                    return False
        return True
    
    # Check to see which argument is larger
    if n1 > n2:
        n1, n2 = n2, n1
    
    # Initialize output and loop through all numbers between n1 and n2
    primes = []
    for num in range(n1 + 1, n2):
        if is_prime_ipyparallel(num):
            primes.append(num)
    
    # Return result
    return np.array(primes)

In [4]:
dv.map_sync(primes_between_ipyparallel, [0 for i in range(1000)], range(1000))

[array([], dtype=float64),
 array([], dtype=float64),
 array([], dtype=float64),
 array([2]),
 array([2, 3]),
 array([2, 3]),
 array([2, 3, 5]),
 array([2, 3, 5]),
 array([2, 3, 5, 7]),
 array([2, 3, 5, 7]),
 array([2, 3, 5, 7]),
 array([2, 3, 5, 7]),
 array([ 2,  3,  5,  7, 11]),
 array([ 2,  3,  5,  7, 11]),
 array([ 2,  3,  5,  7, 11, 13]),
 array([ 2,  3,  5,  7, 11, 13]),
 array([ 2,  3,  5,  7, 11, 13]),
 array([ 2,  3,  5,  7, 11, 13]),
 array([ 2,  3,  5,  7, 11, 13, 17]),
 array([ 2,  3,  5,  7, 11, 13, 17]),
 array([ 2,  3,  5,  7, 11, 13, 17, 19]),
 array([ 2,  3,  5,  7, 11, 13, 17, 19]),
 array([ 2,  3,  5,  7, 11, 13, 17, 19]),
 array([ 2,  3,  5,  7, 11, 13, 17, 19]),
 array([ 2,  3,  5,  7, 11, 13, 17, 19, 23]),
 array([ 2,  3,  5,  7, 11, 13, 17, 19, 23]),
 array([ 2,  3,  5,  7, 11, 13, 17, 19, 23]),
 array([ 2,  3,  5,  7, 11, 13, 17, 19, 23]),
 array([ 2,  3,  5,  7, 11, 13, 17, 19, 23]),
 array([ 2,  3,  5,  7, 11, 13, 17, 19, 23]),
 array([ 2,  3,  5,  7, 11, 13, 

**Need to merge these lists together**

In [11]:
dv.map_sync(primes_between_ipyparallel, [0, 250001, 500001, 750001], [250000, 500000, 750000, 1000000])

[array([     2,      3,      5, ..., 249971, 249973, 249989]),
 array([250007, 250013, 250027, ..., 499969, 499973, 499979]),
 array([500009, 500029, 500041, ..., 749941, 749971, 749993]),
 array([750019, 750037, 750059, ..., 999961, 999979, 999983])]