In [1]:
import numpy as np
from itertools import product
from joblib import Parallel, delayed
from functools import reduce
from operator import mul
from numba import jit, prange

In [16]:
lst = [[1.0, 2, 3], [4, 5, 6], [7, 8, 9]]
arr = np.array(lst)
n = 4

In [17]:
# Using mul with parallelisation
def test1(arr, n):
    flat = np.ravel(arr).tolist()
    gen = (list(a) for a in product(flat, repeat=n))

    results = np.array(Parallel(n_jobs=-1)(delayed(reduce)(mul, x) for x in gen))

    nrows = arr.shape[0]
    ncols = arr.shape[1]

    arr3d = results.reshape((nrows, ncols)*n)

    arrparmeth1 = arr3d
    for i in range(0,n-1):
        arrparmeth1 = np.concatenate(np.concatenate(arrparmeth1 , axis=1), axis=1)
    
    return arrparmeth1

In [18]:
# Using numba (including Intel SVML) with parallelisation
def test2(arr, n):
    flat = np.ravel(arr).tolist()
    gen = np.array([list(a) for a in product(flat, repeat=n)])
    
    @jit(nopython=True, parallel=True)
    def mtp(gen):
        nrows_gen = gen.shape[0]
        results = np.empty(nrows_gen)
#        def op(x, y):                     # numba with parallel=True does not work with mul
#            return mul(x, y)
        for i in prange(0,nrows_gen):
            results[i] = np.prod(gen[i])
        return results
    
    nrows = arr.shape[0]
    ncols = arr.shape[1]

    arr3d = mtp(gen).reshape((nrows, ncols)*n)

#    @jit(nopython=True, parallel=True)    # numba with parallel=True does not work with nested concatenate
#    @jit                                  # numba does not work with the code below
    def con(arr3d):
        arrparmeth1 = arr3d
        for i in prange(0,n-1):
            arrparmeth1 = np.concatenate(np.concatenate(arrparmeth1 , axis=1), axis=1)
        return arrparmeth1
    return con(arr3d)

In [19]:
# Using numba (including Intel SVML) with np.kron
def test3(arr, n):
#    @jit(nopython=True, parallel=True)    # numba with parallel=True does not work with kron
    @jit
    def testkron(arr, n):
        arr_copy = arr
        for j in prange(0,n-1):
            arr = np.kron(arr, arr_copy)
        arrkronmeth = arr
        return arrkronmeth
    return testkron(arr, n)

In [20]:
# Using np.kron
def testkron(arr, n):
    arr_copy = arr
    for j in range(0, n-1):
        arr = np.kron(arr, arr_copy)
    arrkronmeth = arr
    return arrkronmeth

In [21]:
# Check results of test1
np.array_equal(test1(arr, n), testkron(arr, n))

True

In [22]:
# Check results of test2
np.array_equal(test2(arr, n), testkron(arr, n))

True

In [23]:
# Check results of test3
np.array_equal(test3(arr, n), testkron(arr, n))

True

In [24]:
# Time required for test1
%timeit test1(arr, n)

708 ms ± 47.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [25]:
# Time required for test2
%timeit test2(arr, n)

1.16 s ± 129 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [26]:
# Time required for test3
%timeit test3(arr, n)

621 ms ± 39 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [27]:
# Time required for testkron
%timeit testkron(arr, n)

398 µs ± 31.9 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
