# Merge parallel scans
Merge scans collected in parallel with a common *master.h5* file, as implemented in `hula`.  
Parallel scans are typically measured using a multi-sample holder mounted on the xy-stage, such as the *AMPIX* or battery setup.  
  
The note book reads metadata and scan indices from the *master.h5* file, followed by the azimuthally integrated data from the parallel *\*pilatus_integrated.h5* files, and writes all to a *master_pilatus_integrated.h5* file.
  
**Parameters:**  
`raw_path` : Path to the *raw* files. Can be modified to specify a subdirectory  
`embed_meta` : Toggle whether to embed metadata from the *master.h5* into the *master_pilatus_integrated.h5* file  
`only_new` : Toggle whether to only merge new files where a master_pilatus_integrated.h5 file does not exist  
`delete_redundant_files` : Toggle whether to delete the *_pilatus_integrated.h5* files after merging to the *master_pilatus_integrated.h5* file  

In [None]:
import os
import h5py as h5
import glob
import sys
sys.path.append('../')
import DanMAX as DM
print(os.getcwd())

In [None]:
# raw_path = '/data/visitors/danmax/proposal/visit/raw/'
raw_path = os.getcwd().replace('/scripts/batteries','/raw')  # + '/[subfolder]'
#embed_meta = False              # embed meta data in the master_pilatus_integrated.h5 file
only_new   = False              # only merge files where a master_pilatus_integrated.h5 file does not exist
delete_redundant_files = False  # delete the redundant _pilatus_integrated.h5 files after merging

#### loop through *master.h5* files

In [None]:
# find all master.h5 files in the raw_path
files = sorted(glob.glob(f'{raw_path}/**/master.h5', recursive=True), key = os.path.getctime, reverse=True)
if only_new:
    # remove file names from the list if a **master_pilatus_integrated.h5 file already exists
    files = [f for f in files if not os.path.isfile(f.replace('raw','process/azint').replace('master.h5','master_pilatus_integrated.h5'))]
    
for i,fname in enumerate(files):
    print(f'{i+1} of {len(files)} - {"raw"+fname.split("/raw")[-1]:<150s}')#,end='\r') 
    # read scan ids and metadata from the master.h5 file
    try:
        print('   Reading data...',end='\r')
        scan_ids, metadata, start_pos = DM.parallel.parallelMetaMaster(fname)
    except [OSError, KeyError]:
        # if and OSError is raised, skip to the next file
        print(f'Skipping {fname}')
        continue
    # make a list of all relevant _pilatus_integrated.h5 files
    azi_path = os.path.dirname(fname.replace('raw','process/azint'))
    scan_list = [azi_path + f'/{scan}_pilatus_integrated.h5' for scan in scan_ids]
    scan_list = [scan for scan in scan_list if os.path.isfile(scan)]
    # read the integrated data
    azidata, azimeta = DM.parallel.getParallelAzintData(scan_list)

    # write to output file
    print('   Writing to file...',end='\r')
    azi_master_path = azi_path + f'/master_pilatus_integrated.h5'

    DM.integration.writeAzintFile(azi_master_path,azidata,azimeta)
    
    # # excluded keys
    # excl_keys = ['date', 'input', 'program', 'version']
    # with h5.File(azi_master_path,'w') as f:
    #     for key in azidata:
    #         if not azidata[key] is None and not key in excl_keys:
    #             f.create_dataset(key, data=azidata[key])
    #     if embed_meta:
    #         for key in metadata:
    #             if not metadata[key] is None:
    #                 f.create_dataset(f'meta/{key}', data=metadata[key])
    #         for key in start_pos:
    #             if not start_pos[key] is None:
    #                 f.create_dataset(f'meta/start_positioners/{key}', data=start_pos[key])

    if delete_redundant_files:
        print('   Deleting files... ',end='\r')
        for scan in scan_list:
            try:
                os.remove(scan)
            except PermissionError:
                print('    Unable to delete files')
                break
                
print(f'{"Done!":25s}')