In [1]:
import concurrent.futures
from multiprocessing import cpu_count, shared_memory

import numpy as np

In [10]:
# create a large array of random values
random = np.random.rand(400, 400, 1200)

In [11]:
# create a shared memory array for the input and result arrays, copying the original input data
shm_random = shared_memory.SharedMemory(create=True, size=random.data.nbytes)
shared_random = np.ndarray(random.shape, dtype=random.dtype, buffer=shm_random.buf)
shared_random[:, :, :] = random[:, :, :]
shm_name_random = shm_random.name

shm_result = shared_memory.SharedMemory(create=True, size=random.data.nbytes)
shared_result = np.ndarray(random.shape, dtype=random.dtype, buffer=shm_result.buf)
shm_name_result = shm_result.name

In [12]:
def add_average(ary: np.ndarray) -> np.ndarray:
    return ary + np.mean(ary)

In [13]:
def shm_add_average(
    arguments: dict,
):
    existing_shm_input = shared_memory.SharedMemory(name=arguments["shm_name_input"])
    shm_ary_input = np.ndarray(
        shape=arguments["shape"],
        dtype=arguments["dtype"],
        buffer=existing_shm_input.buf,
    )
    existing_shm_result = shared_memory.SharedMemory(name=arguments["shm_name_result"])
    shm_ary_result = np.ndarray(
        shape=arguments["shape"],
        dtype=arguments["dtype"],
        buffer=existing_shm_result.buf,
    )
    shm_ary_result[arguments["lat_index"], arguments["lon_index"]] = add_average(
        shm_ary_input[arguments["lat_index"], arguments["lon_index"]]
    )

In [14]:
%%time
arguments_list = []
for lat_index in range(shared_random.shape[0]):
    for lon_index in range(shared_random.shape[1]):
        arguments = {
            "lat_index": lat_index,
            "lon_index": lon_index,
            "shm_name_input": shm_name_random,
            "shm_name_result": shm_name_result,
            "dtype": shared_random.dtype,
            "shape": shared_random.shape,
        }
        arguments_list.append(arguments)

# use a ProcessPoolExecutor to download the images in parallel
with concurrent.futures.ProcessPoolExecutor(max_workers=cpu_count()) as executor:
    # use the executor to map the download function to the iterable of arguments
    executor.map(shm_add_average, arguments_list)

CPU times: user 23.9 s, sys: 2.69 s, total: 26.6 s
Wall time: 1min 41s


In [19]:
%%time
added_ary = random.copy()
arguments_list = []
for lat_index in range(random.shape[0]):
    for lon_index in range(random.shape[1]):
        added_ary[lat_index, lon_index] = add_average(random[lat_index, lon_index])

CPU times: user 1.61 s, sys: 69.9 ms, total: 1.68 s
Wall time: 1.68 s


In [20]:
np.allclose(shared_result, added_ary)

True

In [21]:
shared_result.shape

(400, 400, 1200)

In [22]:
added_ary.shape

(400, 400, 1200)