In [None]:
%load_ext autoreload
%autoreload 2

import h5py as h5
import hepfile as hf
from hepfile import dict_tools, awkward_tools

import numpy as np
import os

import multiprocessing

import time

# Make the file and add some data

In [None]:
data = hf.initialize()

hf.create_group(data, 'obj', counter='nobj')
hf.create_dataset(data, ['myfloat'], group='obj', dtype=float)
hf.create_dataset(data, ['myint'], group='obj', dtype=int)
hf.create_dataset(data, ['mystr'], group='obj', dtype=str)

bucket = hf.create_single_bucket(data)

# Normal packing test

nevents = 100000
for n in range(nevents):
    nobj = np.random.randint(0,10)
    for i in range(nobj):
        bucket['obj/myfloat'].append(np.random.random())
        bucket['obj/myint'].append(np.random.randint(5,10))
        bucket['obj/mystr'].append(str(np.random.random()))
    bucket['obj/nobj'] = nobj
    test = hf.pack(data, bucket)

hdfile = hf.write_to_file("FOR_TESTS.hdf5", data, comp_type="gzip", comp_opts=9)



In [None]:
# Read all the data

data,bucket = hf.load('FOR_TESTS.hdf5')

nevents = hf.get_nbuckets_in_data(data)
print(nevents)

In [None]:
# Read some of the data

data,bucket = hf.load('FOR_TESTS.hdf5',subset=(250,300))
print(data['obj/myfloat'][0])

nevents = hf.get_nbuckets_in_data(data)
print(nevents)

In [None]:
print("Number of cpu : ", multiprocessing.cpu_count())


https://www.digitalocean.com/community/tutorials/python-multiprocessing-example

In [None]:
from multiprocessing import Process


def print_func(continent='Asia'):
    print('The name of continent is : ', continent)

if __name__ == "__main__":  # confirms that the code is under main function
    names = ['America', 'Europe', 'Africa']
    procs = []
    proc = Process(target=print_func)  # instantiating without any argument
    procs.append(proc)
    proc.start()

    # instantiating process with arguments
    for name in names:
        # print(name)
        proc = Process(target=print_func, args=(name,))
        procs.append(proc)
        proc.start()

    # complete the processes
    for proc in procs:
        proc.join()

https://docs.python.org/3/library/multiprocessing.html

In [None]:
from multiprocessing import Pool

def f(x):
    return x*x

if __name__ == '__main__':
    with Pool(5) as p:
        print(p.map(f, [1, 2, 3]))

In [None]:
from multiprocessing import Process, Lock

def f(l, i):
    l.acquire()
    try:
        print('hello world ', i)
    finally:
        l.release()

if __name__ == '__main__':
    lock = Lock()

    for num in range(10):
        Process(target=f, args=(lock, num)).start()

What does lock do? Is it just making everything run in series?

In [None]:
from multiprocessing import Process, Lock

def f(l, i):
    l.acquire()
    try:
        data,bucket = hf.load('FOR_TESTS.hdf5',subset=(i*100,(i+1)*100), verbose=False)
        print(i, data['obj/myfloat'][0], np.sum(data['obj/myfloat']))
    finally:
        l.release()

if __name__ == '__main__':
    lock = Lock()

    for num in range(7):
        Process(target=f, args=(lock, num)).start()

In [None]:
from multiprocessing import Pool

def my_function(x):
    return x * x

if __name__ == "__main__":
  # Create a pool of 4 processes
  pool = Pool(4)

  # Map the function to the list of arguments
  results = pool.map(my_function, [1, 2, 3, 4])

  # Print the results
  for result in results:
    print(result)



In [None]:
lo,hi = 0,70000

start = time.time()
data,bucket = hf.load('FOR_TESTS.hdf5',subset=(lo,hi), verbose=False)
total = np.sum(np.cos(np.sin(np.log(data['obj/myfloat'])**5)))
print(i, data['obj/myfloat'][0], total)

print(f"time to run: {time.time()-start}")

In [None]:
from multiprocessing import Pool

lock = Lock()

def my_function(i):
    lock.acquire()
    total = 0
    try:
        data,bucket = hf.load('FOR_TESTS.hdf5',subset=(i*10000,(i+1)*10000), verbose=False)
        #total = np.sum(np.log(data['obj/myfloat'])**5)
        total = np.sum(np.cos(np.sin(np.log(data['obj/myfloat'])**5)))
        print(i, data['obj/myfloat'][0], total)
    finally:
        lock.release()
        
    return total

if __name__ == "__main__":
    
    nprocesses = 7
    
    start = time.time()
    # Create a pool of nprocesses
    pool = Pool(nprocesses)
    #lock = Lock()

    # Map the function to the list of arguments
    results = pool.map(my_function, np.arange(nprocesses))

    # Print the results
    for result in results:
        print(result)
    print(sum(results))
        
    print(f"time to run: {time.time()-start}")

