## Norma queue speed in multiprocess

In [1]:
import multiprocessing as mp
import numpy as np
import time

def foo(q):
    t = np.zeros((16,2048,2048,3), dtype=np.uint8)
    start = time.time()
    q.put(t)
    end = time.time()
    q.put(end - start)
    # print('put time', end - start)

q = mp.Queue()
p = mp.Process(target=foo, args=(q,))
p.start()

# Make sure process has put something to queue
time.sleep(3)

start = time.time()
r = q.get(timeout=5)
end = time.time()
p.join()
r = q.get(timeout=5)
diff = end - start
print('put time', '{}s'.format((round(r, 4))))
print('get time', '{}s'.format(round(diff, 4)))

put time 0.0011s
get time 0.4693s


In [None]:
# Restart kernel to avoid issues, necessary before running next lines

# When using jupyter lab
# import os
# os._exit(00)

# When using jupyter notebook
from IPython.core.display import HTML
HTML("<script>Jupyter.notebook.kernel.restart()</script>")

## Torch queue speed (shared memory)

In [1]:
import torch.multiprocessing as mp
from torchelastic import events
import torchelastic
import sys, importlib
import inspect
import time
import numpy as np

def parallal_task(func, e):

    # Create a temp file for "func"
    with open(f'./tmp_func_file.py', 'w') as file:
        file.write(inspect.getsource(func).replace(func.__name__, "func"))

    # Reload the tmp_func_file when making changes to foo, without restarting kernel.
    importlib.reload(sys.modules.get('tmp_func_file', sys))
    from tmp_func_file import func

    if __name__ == '__main__':
        smp = mp.get_context('spawn')
        q = smp.Queue()
        p = mp.spawn(fn=func, args=(q, e,), join=False, nprocs=1)
        
        return p, q
    else:
        raise "Not in Jupyter Notebook"

def foo(i, q, e):
    import torch
    import time
    
    start = time.time()
    q.put(torch.ByteTensor(16,2048,2048,3))
    end = time.time()
    q.put(end - start)
    e.wait()

smp = mp.get_context('spawn')
e = smp.Event()
p, q = parallal_task(foo, e)

# Make sure process has put something to queue
time.sleep(3)

start = time.time()
r = q.get(timeout=10)
end = time.time()
r = q.get(timeout=10)
diff = end - start
print('put time', '{}s'.format((round(r, 4))))
print('get time', '{}s'.format(round(diff, 4)))
e.set()

put time 0.0006s
get time 0.0041s


In [3]:
# Depending on some variations, in this experiment, 
# using torch queue has at least 100x improvement in speed.
0.4693/0.0041

114.46341463414633