In [46]:
## Sample optmization problem to move away from loop logic in numpy
## https://softwareengineering.stackexchange.com/questions/254475/how-do-i-move-away-from-the-for-loop-school-of-thought

In [4]:
import numpy as np

In [5]:
# Optimization Problem 1
def sumproducts(x, y):
    """Return the sum of x[i] * y[j] for all pairs of indices i, j.

    >>> sumproducts(np.arange(3000), np.arange(3000))
    20236502250000

    """
    result = 0
    for i in range(len(x)):
        for j in range(len(y)):
            result += x[i] * y[j]
    return result

In [44]:
sumproducts(np.arange(3000), np.arange(3000)) == 20236502250000

True

In [29]:
%%timeit
sumproducts(np.arange(3000), np.arange(3000)) == 20236502250000

2.63 s ± 117 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [26]:
# Optimization solution 1

def optimized_sumproducts(x, y):
    result = [(x * y1).sum() for y1 in y]
    return np.array(result).sum()

In [45]:
optimized_sumproducts(np.arange(3000), np.arange(3000)) == 20236502250000

True

In [28]:
%%timeit
optimized_sumproducts(np.arange(3000), np.arange(3000)) == 20236502250000

15.8 ms ± 321 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [34]:
# Optimization problem 2
def countlower(x, y):
    """Return the number of pairs i, j such that x[i] < y[j].

    >>> countlower(np.arange(0, 200, 2), np.arange(40, 140))
    4500

    """
    result = 0
    for i in range(len(x)):
        for j in range(len(y)):
            if x[i] < y[j]:
                result += 1
    return result


In [None]:
countlower(np.arange(0, 200, 2), np.arange(40, 140)) == 4500

In [43]:
%%timeit
countlower(np.arange(0, 200, 2), np.arange(40, 140)) == 4500

2.27 ms ± 258 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [39]:
# Optimization solution 2
def optimized_countlower(x, y):
    result = [(x1 < y) for x1 in x]
    return np.array(result).sum()

In [None]:
optimized_countlower(np.arange(0, 200, 2), np.arange(40, 140)) == 4500

In [42]:
%%timeit
optimized_countlower(np.arange(0, 200, 2), np.arange(40, 140)) == 4500

154 µs ± 22.2 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [47]:
# Optimization problem 1
def cleanup(x, missing=-1, value=0):
    """Return an array that's the same as x, except that where x ==
    missing, it has value instead.

    >>> cleanup(np.arange(-3, 3), value=10)
    ... # doctest: +NORMALIZE_WHITESPACE
    array([-3, -2, 10, 0, 1, 2])

    """
    result = []
    for i in range(len(x)):
        if x[i] == missing:
            result.append(value)
        else:
            result.append(x[i])
    return np.array(result)

In [48]:
cleanup(np.arange(-3, 3), value=10)

array([-3, -2, 10,  0,  1,  2])

In [49]:
# Optimization solution 1
def optimized_cleanup(x, missing=-1, value=0):
    return x

In [50]:
optimized_cleanup(np.arange(-3, 3), value=10)

array([-3, -2, -1,  0,  1,  2])