### Parallel processing

In [10]:
from math import sqrt
import typing

In [11]:
[sqrt(i ** 2) for i in range(10)]

[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]

In [12]:
from joblib import Parallel, delayed

In [13]:
Parallel(n_jobs=2)(delayed(sqrt)(i ** 2) for i in range(10))

[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]

In [14]:
p = Parallel(n_jobs=2)

In [16]:
isinstance(p, typing.Callable)

True

In [18]:
p?

[0;31mSignature:[0m      [0mp[0m[0;34m([0m[0miterable[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mType:[0m           Parallel
[0;31mString form:[0m    Parallel(n_jobs=2)
[0;31mFile:[0m           ~/code/miku/pyflow/.venv/lib/python3.12/site-packages/joblib/parallel.py
[0;31mDocstring:[0m     
Helper class for readable parallel mapping.

Read more in the :ref:`User Guide <parallel>`.

Parameters
----------
n_jobs: int, default=None
    The maximum number of concurrently running jobs, such as the number
    of Python worker processes when ``backend="loky"`` or the size of
    the thread-pool when ``backend="threading"``.
    This argument is converted to an integer, rounded below for float.
    If -1 is given, `joblib` tries to use all CPUs. The number of CPUs
    ``n_cpus`` is obtained with :func:`~cpu_count`.
    For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. For instance,
    using ``n_jobs=-2`` will result in all CPUs but one being used.
    This argument can 

In [19]:
delayed?

[0;31mSignature:[0m [0mdelayed[0m[0;34m([0m[0mfunction[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m Decorator used to capture the arguments of a function.
[0;31mFile:[0m      ~/code/miku/pyflow/.venv/lib/python3.12/site-packages/joblib/parallel.py
[0;31mType:[0m      function

In [21]:
f = delayed(sqrt)

In [28]:
[f(i**2) for i in range(10)]

[(<function math.sqrt(x, /)>, (0,), {}),
 (<function math.sqrt(x, /)>, (1,), {}),
 (<function math.sqrt(x, /)>, (4,), {}),
 (<function math.sqrt(x, /)>, (9,), {}),
 (<function math.sqrt(x, /)>, (16,), {}),
 (<function math.sqrt(x, /)>, (25,), {}),
 (<function math.sqrt(x, /)>, (36,), {}),
 (<function math.sqrt(x, /)>, (49,), {}),
 (<function math.sqrt(x, /)>, (64,), {}),
 (<function math.sqrt(x, /)>, (81,), {})]

In [33]:
p([(lambda x: x + 1, (1,), {})])

[2]

### Memoization

In [3]:
from joblib import Memory
import time
import numpy as np


def costly_compute(data, column_index=0):
    """Simulate an expensive computation"""
    time.sleep(5)
    return data[column_index]

In [5]:
rng = np.random.RandomState(42)
data = rng.randn(int(1e5), 10)
start = time.time()
data_trans = costly_compute(data)
end = time.time()

print('\nThe function took {:.2f} s to compute.'.format(end - start))
print('\nThe transformed data are:\n {}'.format(data_trans))


The function took 5.00 s to compute.

The transformed data are:
 [ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337 -0.23413696
  1.57921282  0.76743473 -0.46947439  0.54256004]


In [6]:
from joblib import Memory
location = './cachedir'
memory = Memory(location, verbose=0)


def costly_compute_cached(data, column_index=0):
    """Simulate an expensive computation"""
    time.sleep(5)
    return data[column_index]


costly_compute_cached = memory.cache(costly_compute_cached)
start = time.time()
data_trans = costly_compute_cached(data)
end = time.time()

print('\nThe function took {:.2f} s to compute.'.format(end - start))
print('\nThe transformed data are:\n {}'.format(data_trans))



The function took 5.03 s to compute.

The transformed data are:
 [ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337 -0.23413696
  1.57921282  0.76743473 -0.46947439  0.54256004]


In [7]:
memory.clear()

[Memory(location=./cachedir/joblib)]: Flushing completely the cache


### Pickling

In [8]:
from tempfile import mkdtemp

savedir = mkdtemp()

import os

filename = os.path.join(savedir, 'test.joblib')

In [9]:
import numpy as np

to_persist = [('a', [1, 2, 3]), ('b', np.arange(10))]

In [10]:
import joblib

joblib.dump(to_persist, filename)  

['/tmp/tmpm4wdura4/test.joblib']

In [11]:
joblib.load(filename)

[('a', [1, 2, 3]), ('b', array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))]