In [8]:
import math
import numpy as np
from numba import jit, njit, prange
from joblib import Parallel, delayed
from functools import partial
import multiprocessing as mp
from multiprocessing import Pool
import ipyparallel as ipp
from ipyparallel import Client
import pandas as pd
import timeit
%load_ext cython

The cython extension is already loaded. To reload it, use:
  %reload_ext cython


**1**. (100 points)

Write a predicate function `is_prime` that efficiently checks whether a number is prime. Use this to write a second function `primes_between` that returns the prime numbers between two integers as a `numpy` array.

- (10 points) Do this in regular Python 
- (10 points) Accelerate using `numba` (serial version) 
- (15 points) Accelerate using `numba` (parallel version)
- (10 points) Accelerate using `cython` (serial version) 
- (15 points) Accelerate using `cython` (parallel version)
- (10 points) Report the speed-up multiplier as an integer of the `numba` and `cython` serial and parallel versions using `timeit` in a DataFrame for the numbers between 0 and 1,000,000
- (10 points each) Run the serial version of the python `primes_between` function in parallel using
    - `multiprocessing`
    - `joblib`
    - `ipyparallel`

- (10 points) Do this in regular Python 



*Solution from Midterm 1 is used for is_prime( ) function. primes_between( ) function is written to be compatible (and comparable) with numba and cython versions down the line. I could write a more efficient python-only version of primes_between if that was the goal here*

In [9]:
def is_prime(n):
    """Returns True if a given integer n is prime and false otherwise"""

    if n == 2:
        return True
    elif n < 2 or n % 2 == 0:
        return False
    else:
        for i in range(3, int(np.sqrt(n))+1, 2):
            if n % i == 0:
                return False
    return True

In [10]:
def primes_between(n1, n2):
    """Returns prime numbers between n1 and n2 (inclusive on both ends) as a numpy array"""
    
    # Check to see which argument is larger
    if n1 > n2:
        n1, n2 = n2, n1
    
    # Initialize range of interest
    primes = np.arange(n1, n2 + 1)
    
    # Mark all non-prime numbers
    for i in range(primes.shape[0]):
        if not is_prime(primes[i]):
            primes[i] = -999
    
    # Return all primes
    return primes[primes != -999]

In [11]:
# Prove that function works
primes_between(0, 50)

array([ 2,  3,  5,  7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47])

- (10 points) Accelerate using `numba` (serial version) 



In [12]:
@jit(nopython=True, cache=True)
def is_prime_numba_serial(n):
    """Returns True if a given integer n is prime and false otherwise"""

    if n == 2:
        return True
    elif n < 2 or n % 2 == 0:
        return False
    else:
        for i in range(3, int(np.sqrt(n))+1, 2):
            if n % i == 0:
                return False
    return True

In [13]:
@jit(nopython=True, cache=True)
def primes_between_numba_serial(n1, n2):
    """Returns prime numbers between n1 and n2 (exclusive on both ends) as a numpy array"""
    
    # Check to see which argument is larger
    if n1 > n2:
        n1, n2 = n2, n1
    
    # Initialize range of interest
    primes = np.arange(n1, n2 + 1)
    
    # Mark all non-prime numbers
    for i in range(primes.shape[0]):
        if not is_prime_numba_serial(primes[i]):
            primes[i] = -999
    
    # Return all primes
    return primes[primes != -999]

In [14]:
# Prove that function works
primes_between_numba_serial(0, 50)

array([ 2,  3,  5,  7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47])

- (15 points) Accelerate using `numba` (parallel version)



*Here, we parallelize the "primes_between" function but leave the "is_prime" function as serial (because it only checks one number for each function call)*

In [16]:
@njit(parallel = True)
def primes_between_numba_parallel(n1, n2):
    """Returns prime numbers between n1 and n2 (exclusive on both ends) as a numpy array"""
    
    # Check to see which argument is larger
    if n1 > n2:
        n1, n2 = n2, n1
    
    # Initialize range of interest
    primes = np.arange(n1, n2 + 1)
    
    # Mark all non-prime numbers
    for i in prange(primes.shape[0]):
        if not is_prime_numba_serial(primes[i]):
            primes[i] = -999
    
    # Return all primes
    return primes[primes != -999]

In [9]:
# Prove that function works
primes_between_numba_parallel(0, 50)

array([ 2,  3,  5,  7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47])

- (10 points) Accelerate using `cython` (serial version) 



Reference on using numpy arrays in cython: https://www.futurelearn.com/info/courses/python-in-hpc/0/steps/65126

In [2]:
%%cython

import cython
import numpy as np
cimport numpy as cnp

# Primitive function
cdef is_prime_cython_serial(int n):
    """Returns True if a given integer n is prime and false otherwise"""
    
    # Declare variables
    cdef int i
    cdef int upper
    upper = int(n ** 0.5 + 1)

    # Perform checks    
    if n == 2:
        return 1
    elif n < 2 or n % 2 == 0:
        return 0
    else:
        for i in range(3, upper, 2):
            if n % i == 0:
                return 0
    return 1

def primes_between_cython_serial(int n1, int n2):
    """Returns prime numbers between n1 and n2 (exclusive on both ends) as a numpy array"""
    
    # Declare variables
    cdef int i
    cdef cnp.ndarray[cnp.int_t, ndim = 1] primes
    
    # Check to see which argument is larger
    if n1 > n2:
        n1, n2 = n2, n1
    
    # Initialize range of interest
    primes = np.arange(n1, n2 + 1)
    
    # Mark all non-prime numbers
    for i in range(primes.shape[0]):
        if not is_prime_cython_serial(primes[i]):
            primes[i] = -999
    
    # Return all primes
    return primes[primes != -999]

In [3]:
# Prove that function works
primes_between_cython_serial(0, 50)

array([ 2,  3,  5,  7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47])

- (15 points) Accelerate using `cython` (parallel version)



In [6]:
%%cython --compile-args=-fopenmp --link-args=-fopenmp --force -I /usr/local/opt/libomp/include -L /usr/local/opt/libomp/lib

import cython
from cython.parallel import parallel, prange
import numpy as np
cimport numpy as cnp
from libcpp cimport bool

# Primitive function
cdef is_prime_cython_serial(int n) nogil:
    """Returns True if a given integer n is prime and false otherwise"""
    
    # Declare variables
    cdef int i
    cdef int upper
    upper = int(n ** 0.5 + 1)
    
    # Perform checks    
    if n == 2:
        return 1
    elif n < 2 or n % 2 == 0:
        return 0
    else:
        for i in range(3, upper, 2):
            if n % i == 0:
                return 0
    return 1

def primes_between_cython_parallel(int n1, int n2):
    """Returns prime numbers between n1 and n2 (exclusive on both ends) as a numpy array"""
    
    # Declare variables
    cdef int i
    cdef cnp.ndarray[cnp.int_t, ndim = 1] primes
    
    # Check to see which argument is larger
    if n1 > n2:
        n1, n2 = n2, n1
    
    # Initialize range of interest
    primes = np.arange(n1, n2 + 1)
    
    # Mark all non-prime numbers
    with cython.nogil, parallel():
        for i in prange(primes.shape[0]):
            if not is_prime_cython_serial(primes[i]):
                primes[i] = -999
    
    # Return all primes
    return primes[primes != -999]


Error compiling Cython file:
------------------------------------------------------------
...
import numpy as np
cimport numpy as cnp
from libcpp cimport bool

# Primitive function
cdef is_prime_cython_serial(int n, int check) nogil:
    ^
------------------------------------------------------------

/home/jovyan/.cache/ipython/cython/_cython_magic_663015b409367be5ea55738e982eea32.pyx:9:5: Function with Python return type cannot be declared nogil

Error compiling Cython file:
------------------------------------------------------------
...
    upper = int(n ** 0.5 + 1)
    check = 1
    
    # Perform checks    
    if n == 2:
        return check
       ^
------------------------------------------------------------

/home/jovyan/.cache/ipython/cython/_cython_magic_663015b409367be5ea55738e982eea32.pyx:20:8: Returning Python object not allowed without gil

Error compiling Cython file:
------------------------------------------------------------
...
    upper = int(n ** 0.5 + 1)
    che

- (10 points) Report the speed-up multiplier as an integer of the `numba` and `cython` serial and parallel versions using `timeit` in a DataFrame for the numbers between 0 and 1,000,000



*Values in data frame are calculated as (time for serial version in python) / (time for sped up version). A larger integer value denotes a greater speed-up*

In [15]:
# Create dataframe
df = pd.DataFrame(dict(Serial = [0, 0], Parallel = [0, 0]), index = ['numba', 'cython'])

# Set upper bound
upper = 1000000

# Calculate reference time
ref = %timeit -o -r3 -n3 primes_between(0, upper)
ref = ref.average

# numba serial
numba_serial = %timeit -o -r3 -n3 primes_between_numba_serial(0, upper)
numba_serial = numba_serial.average
df.iloc[0, 0] = int(np.round(ref / numba_serial))

# numba parallel
numba_parallel = %timeit -o -r3 -n3 primes_between_numba_parallel(0, upper)
numba_parallel = numba_parallel.average
df.iloc[0, 1] = int(np.round(ref / numba_parallel))

# cython serial
cython_serial = %timeit -o -r3 -n3 primes_between_cython_serial(0, upper)
cython_serial = cython_serial.average
df.iloc[1, 0] = int(np.round(ref / cython_serial))

# cython parallel
#cython_parallel = %timeit -o -r3 -n3 primes_between_cython_parallel(0, upper)
#cython_parallel = cython_parallel.average
#df.iloc[1, 1] = int(np.round(ref / cython_parallel))

df

20.2 s ± 12.8 ms per loop (mean ± std. dev. of 3 runs, 3 loops each)
205 ms ± 1.07 ms per loop (mean ± std. dev. of 3 runs, 3 loops each)


NameError: name 'primes_between_numba_parallel' is not defined

- (10 points) `multiprocessing`



In [5]:
# Create array with appropriate partitions
loop_array = np.array([0, 125000, 125001, 250000, 250001, 375000, 375001, 500000, 
                       500001, 625000, 625001, 750000, 750001, 875000, 875001, 1000000])

# Parallelize using multiprocessing
with mp.Pool(processes = 8) as pool:
    res = pool.starmap(primes_between, np.array_split(loop_array, 8))
    
# Show result
np.concatenate(res)

array([     2,      3,      5, ..., 999961, 999979, 999983])

- (10 points) `joblib`



In [25]:
res = Parallel(n_jobs=8)(delayed(primes_between)(i - 125000, i) for i in range(125000, 1000001, 125000))
np.concatenate(res).astype('int')

array([     2,      3,      5, ..., 999961, 999979, 999983])

- (10 points) `ipyparallel`

In [16]:
# Connect to cluster of remote engines
rc = Client()
dv = rc[:]

# Import numpy on engine
with dv.sync_imports():
    import numpy

importing numpy on engine(s)


We copy and paste our original python functions with two minor modifications:

- Use the full name, numpy, instead of np. This change appears to be required when we use dv.sync_imports( )
- We note with a decorator for primes_between( ) that it depends on is_prime( )

In [20]:
def is_prime_ipy(n):
    """Returns True if a given integer n is prime and false otherwise"""

    if n == 2:
        return True
    elif n < 2 or n % 2 == 0:
        return False
    else:
        for i in range(3, int(numpy.sqrt(n))+1, 2):
            if n % i == 0:
                return False
    return True

In [21]:
@ipp.require(is_prime_ipy)
def primes_between_ipy(n1, n2):
    """Returns prime numbers between n1 and n2 (inclusive on both ends) as a numpy array"""
    
    # Check to see which argument is larger
    if n1 > n2:
        n1, n2 = n2, n1
    
    # Initialize range of interest
    primes = numpy.arange(n1, n2 + 1)
    
    # Mark all non-prime numbers
    for i in range(primes.shape[0]):
        if not is_prime(primes[i]):
            primes[i] = -999
    
    # Return all primes
    return primes[primes != -999]

In [22]:
res = dv.map_sync(primes_between_ipy, [0, 250001, 500001, 750001], [250000, 500000, 750000, 1000000])
np.concatenate(res)

array([     2,      3,      5, ..., 999961, 999979, 999983])