# Find corrupted files in proposal folder
Running this notebook will scan a proposal folder for corrupt .h5 files and output found files as a list.<br>
The proposal name is suplied as a string

In [None]:
proposal = '' # If no proposal number is provided, the one from the current work directory is used

#------------------------------------------------
import glob
import os
import h5py


if not proposal:
    idx = os.getcwd().split('/').index('danmax')
    proposal, visit =  os.getcwd().split('/')[idx+1:idx+3]

if not os.path.isdir('/data/visitors/danmax/{0}/'.format(proposal)):
    print('Proposal number {} was never meassured at DanMAX!'.format(proposal))
else:  
    fnames = glob.glob('/data/visitors/danmax/{0}/**/raw/**/*.h5'.format(proposal), recursive=True)

    print("The following files in proposal {0} seems to be corrupted:".format(proposal))
    for fname in fnames:
        try:
            file = h5py.File(fname, 'r')
        except OSError:
            print(fname)      
    print('Done!')

## Check integrity of the integrated files <a id='check-integrity'></a>
Pairwise compare the size of the raw data files to that of the integrated files. Skips scans where a corresponding __integrated.h5_ file does not exist.

In [None]:
print('The following file pairs have mismatching lengths:')
pairs = 0
mismatch = []

for fname in [f for f in fnames if '_integrated.h5' not in f]:
    fname_azint = fname.replace('.h5','_integrated.h5').replace('raw/','process/azint/')
    try:
        with h5py.File(fname,'r') as f:
            shape_raw = f['/entry/instrument/pilatus/data'].shape[0]
        with h5py.File(fname_azint,'r') as f:
            shape_azint = f['I'].shape[0]
        if shape_raw != shape_azint:
            print(f'{fname.split(proposal)[-1]} != {fname_azint.split(proposal)[-1]}')
            print('Number of points:',shape_raw,' vs ',shape_azint,'\n')
            mismatch.append(fname)
        pairs += 1
    except OSError:
        pass
    except KeyError:
        #print(fname)
        pass

print('Integrity check complete')
print(f'Pairs checked: {pairs}, mismatches: {len(mismatch)}')

## Reintegrate mismatched files
Reintegrate the mismatched pairs using .poni settings extracted from the meta data.  
Based on the [`azint-integrate.ipynb`](azint-integrate.ipynb) script.

In [None]:
# initialize relevant modules and method

%matplotlib widget
import fabio
import numpy as np
from multiprocessing import Process
from azint import AzimuthalIntegrator
import ipywidgets
from ipywidgets import Text, IntProgress
from IPython.display import display, clear_output

dset_name = '/entry/instrument/pilatus/data'

def integrate_file(ai, fname, config, mask_file):
    fh = h5py.File(fname, 'r')
    images = fh[dset_name]
    
    output_fname = fname.replace('raw', 'process/azint')
    root = os.path.splitext(output_fname)[0]
    output_fname = '%s_integrated.h5' %root
    output_folder = os.path.split(output_fname)[0]
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    output_fh = h5py.File(output_fname, 'w')
    shape = (len(images), *ai.output_shape)
    I_dset = output_fh.create_dataset('I', shape=shape, dtype=np.float32)
    if ai.error_model == 'poisson':
        sigma_dset = output_fh.create_dataset('sigma', shape=shape, dtype=np.float32)
    output_fh.create_dataset(ai.unit, data=ai.radial_axis)
    with open(config['poni_file'], 'r') as poni:
        p = output_fh.create_dataset('poni_file', data=poni.read())
        p.attrs['filename'] = config['poni_file']
    output_fh.create_dataset('mask_file', data=mask_file)
    polarization_factor = config['polarization_factor'] 
    data = polarization_factor if polarization_factor is not None else 0
    output_fh.create_dataset('polarization_factor', data=data)
        
    fname_widget = Text(value = 'Integrating file: %s' %os.path.split(fname)[1])
    progress = IntProgress(min=0, max=len(images))
    display(fname_widget)
    display(progress)

    for i, img in enumerate(images):
        if i % 10 == 0:
            progress.value = i
        I, sigma = ai.integrate(img)
        I_dset[i] = I
        if sigma is not None:
            sigma_dset[i] = sigma

    output_fh.close()

In [None]:
# Iterate through mismatching files
for i,fname in enumerate(mismatch):
    print(f'Reintegrating {i+1} of {len(mismatch)}: {fname}')
    fname_azint = fname.replace('.h5','_integrated.h5').replace('raw/','process/azint/')
    
    #fname = '/data/visitors/danmax/PROPOSAL/VISIT/raw/SAMPLE/scan-XXXX_pilatus.h5'
    
    # temporary poni file path
    temp_poni = os.path.join(os.path.dirname(fname_azint),'temp_poni.poni')
    
    # Extract relevant integration settings
    with h5py.File(fname_azint,'r') as f:
        if 'q' in f.keys():
            x_unit = 'q'
        elif '2th' in f.keys():
            x_unit = '2th'
        poni = f['poni_file'][()]
        
        config = {'poni_file': temp_poni,
                  'mask': f['mask_file'][()],
                  'radial_bins': f['I'].shape[-1],
                  'azimuth_bins': None,
                  'unit': x_unit,
                  'n_splitting': 15,
                  'error_model': None, # DO NOT CHANGE!
                  'polarization_factor': f['polarization_factor'][()],
                  'pixel_size': 172.0e-6, # DO NOT CHANGE!
                  'shape' : (1679, 1475)} # DO NOT CHANGE!
    
    # Create/overwrite temporary PONI file
    with open(temp_poni,'bw') as pf:
        pf.write(poni)
    
    if True:
        mask_file = config['mask']
        if config['mask']:
            mask_fname = config['mask']
            ending = os.path.splitext(mask_fname)[1]
            if ending == b'.npy':
                config['mask'] = np.load(mask_fname)
            else:
                config['mask'] = fabio.open(mask_fname).data 
        ai = AzimuthalIntegrator(**config)

        integrate_file(ai, fname, config,mask_file)
    
    # delete temporary PONI file
    os.remove(temp_poni)
print('Done')

Run the [`Check integrity`](#check-integrity) cell again.