In [30]:
"""
https://research.wmz.ninja/articles/2018/03/on-sharing-large-arrays-when-using-pythons-multiprocessing.html

multiprocess returns a list, can be list of tuples or list of arrays

"""


import numpy as np
import time
from multiprocessing import Pool, RawArray

# A global dictionary storing the variables passed from the initializer.
var_dict = {}

def init_worker(X, X_shape):
    # Using a dictionary is not strictly necessary. You can also
    # use global variables.
    var_dict['X'] = X
    var_dict['X_shape'] = X_shape

def worker_func(i):
    # Simply computes the sum of the i-th row of the input matrix X
    X_np = np.frombuffer(var_dict['X']).reshape(var_dict['X_shape'])
    time.sleep(1) # Some heavy computations
    print(f"worker {i}")
    return np.sum(X_np[i,:]).item(), np.full(15,i)

# We need this check for Windows to prevent infinitely spawning new child
# processes.
if __name__ == '__main__':
    X_shape = (16, 1000000)
    # Randomly generate some data
    data = np.random.randn(*X_shape)
    X = RawArray('d', X_shape[0] * X_shape[1])
    # Wrap X as an numpy array so we can easily manipulates its data.
    X_np = np.frombuffer(X).reshape(X_shape)
    # Copy data to our shared array.
    np.copyto(X_np, data)
    # Start the process pool and do the computation.
    # Here we pass X and X_shape to the initializer of each worker.
    # (Because X_shape is not a shared variable, it will be copied to each
    # child process.)
    resarray=np.zeros(X_shape[0]) #
    print(resarray) #

    with Pool(processes=4, initializer=init_worker, initargs=(X, X_shape)) as pool:
        print(X_shape) #
        inarray=range(X_shape[0])
        #result = pool.map(worker_func, inarray)
        result = pool.map(worker_func, inarray)
        print('raw result:\n', result)
        print(type(result[0]))
        print('Results (pool):\n', np.array(result[:][1]))
    # Should print the same results.
    print('Results (numpy):\n', np.sum(X_np, 1))

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
(16, 1000000)
worker 2worker 1

worker 3worker 0

worker 4
worker 5
worker 6worker 7

worker 8
worker 9
worker 10worker 11

worker 12
worker 13
worker 15worker 14

raw result:
 [(-806.1063658982544, array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])), (267.4069129089456, array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])), (648.9563063060658, array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])), (311.7159939927641, array([3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3])), (-72.42718171263712, array([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4])), (-989.0265197260484, array([5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5])), (8.894800388171184, array([6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6])), (-437.9956110372471, array([7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7])), (502.40349600884144, array([8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8])), (-984.0816754695275, array([9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9])), (-254.3717254145



In [36]:
info = [ ( 1, 2), (3, 4), (5, 6) ]

print(info[0][:])
info[0][1] == 2
info[1][0] == 3
info[1][1] == 4
info[2][0] == 5
info[2][1] == 6

(1, 2)


True