In [3]:
!ls /gpfs/soma_interim/cne/watkins/xcorr_dump_macaque_3d_iorder3517

ls: cannot access /gpfs/soma_interim/cne/watkins/xcorr_dump_macaque_3d_iorder3517: Permission denied


In [1]:
!ls /gpfs/soma_fs/scratch/valerio/xcorr_dump_macaque_3d_iorder3517 | wc -l

5614


In [2]:
import os
import re
import tifffile
import numpy as np
import multiprocessing as mp

In [3]:
# return a dictionary of files matching filename regex pattern
# the regex also defines the key to use for the dictionary
# The filename_regex has the form: r'FILE_PREFIX(FILE_KEY)\.EXT'
# Example:  r'image([0-9]+)\.tif'
def search_files(file_path, filename_regex):
    files = {}
    for f in os.listdir(file_path):
        file_match = re.match(filename_regex, f)
        if file_match:
            file_id = int(file_match.group(1))
            file_name = os.path.join(file_path, f)
            files[file_id] = file_name
    return files

In [3]:
def read_files_parallel(files, num_procs=mp.cpu_count()):
    with mp.Pool(num_procs) as pool:
        return {file_id:file_data for file_id, file_data in
                zip(files.keys(),
                    pool.map(tifffile.imread, files.values()))}

In [8]:
import tqdm
import concurrent.futures as cf
from multiprocessing.pool import ThreadPool

def read_files_parallel_thread(files, num_procs=mp.cpu_count()):
    #with cf.ThreadPoolExecutor(num_procs) as pool:
    with ThreadPool(num_procs) as pool:
        return {file_id:file_data for file_id, file_data in
                zip(files.keys(),
                    tqdm.tqdm(pool.imap(tifffile.imread, files.values()), total=len(files)))}

In [9]:
import resource
import time
import tifffile
import dill
import os
import re
import numpy as np

#from rcc import BatchXCorr
#import xcorr_util as xcu

export_xcorr_comps_path = '/gpfs/soma_fs/scratch/valerio/xcorr_dump_macaque_3d_iorder3517'
#export_xcorr_comps_path = '/gpfs/soma_local/cne/watkins/xcorr_dump_macaque_3d_iorder3517'
plot_input_data = False
plot_statistics = False
normalize_inputs = False
group_correlations = False
use_gpu = True

fn = os.path.join(export_xcorr_comps_path, 'comps.dill')
with open(fn, 'rb') as f: d = dill.load(f)

correlations = d['comps']
Cmax_test = d['Cmax']
Camax_test = d['Camax']

# Gathering the file names of images and templates
image_files = search_files(export_xcorr_comps_path, r'image([0-9]+)\.tif')
templ_files = search_files(export_xcorr_comps_path, r'templ([0-9]+)\.tif')

# NOTE: Using dictionaries for testing. The final version will support a numpy array
# images = np.empty(NUM_IMAGES)
print(f'[BATCH_XCORR] Reading files using process pool of size: {mp.cpu_count()}')
start_time = time.time()
images = read_files_parallel_thread(image_files)
templates = read_files_parallel_thread(templ_files)
stop_time = time.time()
print(f'[BATCH_XCORR] Elapsed time reading files: {stop_time - start_time} seconds')

# Sampling memory use. (maximum resident set size in kilobytes)
usage = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
print(f'[BATCH_XCORR] Current memory usage is {usage / 10 ** 3} MB')


[BATCH_XCORR] Reading files using process pool of size: 96


100%|██████████████████████████████████████▉| 2805/2806 [00:43<00:00, 64.23it/s]
100%|█████████████████████████████████████▉| 2805/2806 [00:07<00:00, 379.75it/s]

[BATCH_XCORR] Elapsed time reading files: 51.15959858894348 seconds
[BATCH_XCORR] Current memory usage is 156776.424 MB



