integrate и integrate_async (ЛР 1 и 2)

In [15]:
def integrate(f, a: float, b: float, *, n_iter: int = 1000):
    if b == a:
        return 0

    h = (b-a)/float(n_iter)
    z = 0
    x = a + h

    while x <= b - h:
        z = z + f(x)
        x = x + h

    y = (f(a)+ f(b)) /2
    z = h*(z+y)

    return round(z, 8)

In [16]:
import math

%timeit -n100 -r10 integrate(math.sin, math.pi / 2, math.pi, n_iter=10**4)
%timeit -n100 -r10 integrate(math.sin, math.pi / 2, math.pi, n_iter=10**5)
%timeit -n100 -r10 integrate(math.sin, math.pi / 2, math.pi, n_iter=10**6)

1.32 ms ± 210 µs per loop (mean ± std. dev. of 10 runs, 100 loops each)
11.4 ms ± 368 µs per loop (mean ± std. dev. of 10 runs, 100 loops each)
115 ms ± 3.95 ms per loop (mean ± std. dev. of 10 runs, 100 loops each)


In [17]:
import concurrent.futures as ftres

def integrate_async(f, a: float, b: float, *, n_jobs: int = 2, n_iter: int = 1000):
    executor = ftres.ThreadPoolExecutor(max_workers=n_jobs)
    step = (b - a) / n_jobs

    fs = [(a + i * step, a + (i + 1) * step) for i in range(n_jobs)]
    spawn_lst = [executor.submit(integrate, f, *interval, n_iter= n_iter // n_jobs) for interval in fs]
    s = [r.result() for r in ftres.as_completed(spawn_lst)]

    return sum(s)

In [18]:
# n_jobs = 2

%timeit -n100 -r10 integrate_async(math.sin, math.pi / 2, math.pi, n_iter=10**4)
%timeit -n100 -r10 integrate_async(math.sin, math.pi / 2, math.pi, n_iter=10**5)
%timeit -n100 -r10 integrate_async(math.sin, math.pi / 2, math.pi, n_iter=10**6)

2.24 ms ± 65.2 µs per loop (mean ± std. dev. of 10 runs, 100 loops each)
15.9 ms ± 419 µs per loop (mean ± std. dev. of 10 runs, 100 loops each)
204 ms ± 4.88 ms per loop (mean ± std. dev. of 10 runs, 100 loops each)


In [19]:
# n_jobs = 4

%timeit -n100 -r10 integrate_async(math.sin, math.pi / 2, math.pi, n_jobs=4, n_iter=10**4)
%timeit -n100 -r10 integrate_async(math.sin, math.pi / 2, math.pi, n_jobs=4, n_iter=10**5)
%timeit -n100 -r10 integrate_async(math.sin, math.pi / 2, math.pi, n_jobs=4, n_iter=10**6)

2.75 ms ± 83.2 µs per loop (mean ± std. dev. of 10 runs, 100 loops each)
17 ms ± 432 µs per loop (mean ± std. dev. of 10 runs, 100 loops each)
228 ms ± 8.08 ms per loop (mean ± std. dev. of 10 runs, 100 loops each)


In [20]:
# n_jobs = 6

%timeit -n100 -r10 integrate_async(math.sin, math.pi / 2, math.pi, n_jobs=4, n_iter=10**4)
%timeit -n100 -r10 integrate_async(math.sin, math.pi / 2, math.pi, n_jobs=4, n_iter=10**5)
%timeit -n100 -r10 integrate_async(math.sin, math.pi / 2, math.pi, n_jobs=4, n_iter=10**6)

2.81 ms ± 98.7 µs per loop (mean ± std. dev. of 10 runs, 100 loops each)
17.5 ms ± 128 µs per loop (mean ± std. dev. of 10 runs, 100 loops each)
238 ms ± 3.7 ms per loop (mean ± std. dev. of 10 runs, 100 loops each)


integrate (Cython, ЛР 3)

In [21]:
%load_ext Cython

In [22]:
%%cython

from cython.parallel import prange

from libc.math cimport sin


ctypedef double (*func)(double x) nogil


cdef double integrate(func f, float a, float b, int n_iter = 1000) nogil:
    if b == a:
        return 0

    cdef double h = (b-a)/n_iter
    cdef double z = 0
    cdef double x = a + h

    while x <= b - h:
        z = z + f(x)
        x = x + h
    
#     cdef int i
#     for i in prange(n_iter):
#         z = z + f(x + i*h)

    cdef double y = (f(a)+ f(b)) /2
    z = h*(z+y)

    return z


cpdef integrate_f(a, b, n_iter):
    return integrate(sin, a, b, n_iter)


In [23]:
%timeit -n100 -r10 integrate_f(math.pi / 2, math.pi, n_iter=10**4)
%timeit -n100 -r10 integrate_f(math.pi / 2, math.pi, n_iter=10**5)
%timeit -n100 -r10 integrate_f(math.pi / 2, math.pi, n_iter=10**6)

137 µs ± 2.92 µs per loop (mean ± std. dev. of 10 runs, 100 loops each)
1.32 ms ± 5.61 µs per loop (mean ± std. dev. of 10 runs, 100 loops each)
13.2 ms ± 64.3 µs per loop (mean ± std. dev. of 10 runs, 100 loops each)


integrate_async (Joblib, ЛР 3)

In [24]:
from joblib import Parallel, delayed

def integrate(f, a: float, b: float, *, n_iter: int = 1000):
    if b == a:
        return 0

    h = (b-a)/float(n_iter)
    z = 0
    x = a + h

    while x <= b - h:
        z = z + f(x)
        x = x + h

    y = (f(a)+ f(b)) /2
    z = h*(z+y)

    return round(z, 8)


def integrate_async(f, a: float, b: float, *, n_jobs: int = 2, n_iter: int = 1000, backend='threading'):
    step = (b - a) / n_jobs

    with Parallel(n_jobs=n_jobs, backend=backend) as p:
        fs = (delayed(integrate)(f, a + i * step, a + (i + 1) * step, n_iter=n_iter // n_jobs)
              for i in range(n_jobs))
        return sum(p(fs))


Тесты с потоками (backend=threading)

In [25]:
# n_jobs = 2

%timeit -n100 -r10 integrate_async(math.sin, math.pi / 2, math.pi, n_iter=10**4)
%timeit -n100 -r10 integrate_async(math.sin, math.pi / 2, math.pi, n_iter=10**5)
%timeit -n100 -r10 integrate_async(math.sin, math.pi / 2, math.pi, n_iter=10**6)

4.05 ms ± 607 µs per loop (mean ± std. dev. of 10 runs, 100 loops each)
20.4 ms ± 196 µs per loop (mean ± std. dev. of 10 runs, 100 loops each)
238 ms ± 2.62 ms per loop (mean ± std. dev. of 10 runs, 100 loops each)


In [26]:
# n_jobs = 4

%timeit -n100 -r10 integrate_async(math.sin, math.pi / 2, math.pi, n_jobs=4, n_iter=10**4)
%timeit -n100 -r10 integrate_async(math.sin, math.pi / 2, math.pi, n_jobs=4, n_iter=10**5)
%timeit -n100 -r10 integrate_async(math.sin, math.pi / 2, math.pi, n_jobs=4, n_iter=10**6)

4.18 ms ± 672 µs per loop (mean ± std. dev. of 10 runs, 100 loops each)
21.2 ms ± 187 µs per loop (mean ± std. dev. of 10 runs, 100 loops each)
257 ms ± 2 ms per loop (mean ± std. dev. of 10 runs, 100 loops each)


In [27]:
# n_jobs = 6

%timeit -n100 -r10 integrate_async(math.sin, math.pi / 2, math.pi, n_jobs=6, n_iter=10**4)
%timeit -n100 -r10 integrate_async(math.sin, math.pi / 2, math.pi, n_jobs=6, n_iter=10**5)
%timeit -n100 -r10 integrate_async(math.sin, math.pi / 2, math.pi, n_jobs=6, n_iter=10**6)

4.26 ms ± 267 µs per loop (mean ± std. dev. of 10 runs, 100 loops each)
21.4 ms ± 314 µs per loop (mean ± std. dev. of 10 runs, 100 loops each)
259 ms ± 1.09 ms per loop (mean ± std. dev. of 10 runs, 100 loops each)


Тесты с процессами (backend=multiprocessing)

In [28]:
# n_jobs = 2

%timeit -n100 -r10 integrate_async(math.sin, math.pi / 2, math.pi, n_jobs=2, n_iter=10**4, backend='multiprocessing')
%timeit -n100 -r10 integrate_async(math.sin, math.pi / 2, math.pi, n_jobs=2, n_iter=10**5, backend='multiprocessing')
%timeit -n100 -r10 integrate_async(math.sin, math.pi / 2, math.pi, n_jobs=2, n_iter=10**6, backend='multiprocessing')

57.7 ms ± 1.17 ms per loop (mean ± std. dev. of 10 runs, 100 loops each)
67.2 ms ± 1.89 ms per loop (mean ± std. dev. of 10 runs, 100 loops each)
155 ms ± 4.49 ms per loop (mean ± std. dev. of 10 runs, 100 loops each)


In [29]:
# n_jobs = 4

%timeit -n100 -r10 integrate_async(math.sin, math.pi / 2, math.pi, n_jobs=4, n_iter=10**4, backend='multiprocessing')
%timeit -n100 -r10 integrate_async(math.sin, math.pi / 2, math.pi, n_jobs=4, n_iter=10**5, backend='multiprocessing')
%timeit -n100 -r10 integrate_async(math.sin, math.pi / 2, math.pi, n_jobs=4, n_iter=10**6, backend='multiprocessing')

66.4 ms ± 774 µs per loop (mean ± std. dev. of 10 runs, 100 loops each)
74.6 ms ± 4.33 ms per loop (mean ± std. dev. of 10 runs, 100 loops each)
128 ms ± 5.28 ms per loop (mean ± std. dev. of 10 runs, 100 loops each)


In [30]:
# n_jobs = 6

%timeit -n100 -r10 integrate_async(math.sin, math.pi / 2, math.pi, n_jobs=6, n_iter=10**4, backend='multiprocessing')
%timeit -n100 -r10 integrate_async(math.sin, math.pi / 2, math.pi, n_jobs=6, n_iter=10**5, backend='multiprocessing')
%timeit -n100 -r10 integrate_async(math.sin, math.pi / 2, math.pi, n_jobs=6, n_iter=10**6, backend='multiprocessing')

75 ms ± 2.05 ms per loop (mean ± std. dev. of 10 runs, 100 loops each)
87.7 ms ± 3.96 ms per loop (mean ± std. dev. of 10 runs, 100 loops each)
123 ms ± 1.1 ms per loop (mean ± std. dev. of 10 runs, 100 loops each)
