In [1]:
import dask.array as da
from glob import glob
import re
import h5py
import os
from tqdm.notebook import tqdm
from datetime import datetime
import numpy as np

def convert_to_real_photon_maps(photon_maps_old, gain=0.2):
    """
    During the Lingjia/Zoey meeting on 10/18/2024 we noticed that the low q 
    photon intensity was too. After further investigating, it appears that 
    the photon maps saved in the folder: scratch/LS/Reduced_Data/reduced_3-17-23/
    requires an additional gain multiplication and rounding to be converted to 
    actual photon arrival events. All data processed before 10/18/2024 using 
    the photon statistics method should not be trusted.
    
    Convert the "*_photon.npy" data that was saved in the reduced_3-17-23 folder
    to (hopefully) real photon maps with discrete values.

    Parameters:
    photon_maps_old (np.ndarray): The old photon maps.
    gain (float, optional): The gain factor to apply to the photon data. Defaults to 0.2.

    Returns:
    discrete_photon (np.ndarray): The discrete photon map with values in the range [0, 255].

    Notes:
    The function loads photon data from a numpy file, applies a gain factor, and then 
    discretizes the values to the range [0, 255].
    """
    photon = np.minimum(photon_maps_old, 255)
    photon = np.maximum(photon, 0)
    photon = np.rint(gain*photon).astype(np.uint8)
    return photon


def write_photon_map_h5_output_path(file_list, chunk_size, run, module):
    """
    Create an HDF5 file for storing photon maps and initialize its metadata.

    Parameters:
    file_list (list): List of files containing photon data.
    chunk_size (tuple): Chunk size for the HDF5 dataset.
    run (int): Run number of the experiment.
    module (int): Module number of the detector.
    """
    # Define the output file path
    output_file = f'./data/photon_maps/run{run:03d}_module{module:02d}.h5'

    # Get the shape of the last file in the list
    last_file_shape = np.load(file_list[-1], 'r').shape

    # Calculate the total number of trains
    n_trains = ((len(file_list)-1)*300) + last_file_shape[0]

    # Define the new shape for the HDF5 dataset
    new_shape = (n_trains, *last_file_shape[-3:])

    # Experimental parameters for saving metadata
    current = [0.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0]
    dark_runNB = [69,  72,  72,  75,  75,  78,  78,  81,  81]
    runNB =      [70,  71,  73,  74,  76,  77,  79,  80,  82]
    index = runNB.index(run)

    # Create the HDF5 file and initialize its metadata
    with h5py.File(output_file, 'w') as f:
        # Create the dataset for storing photon maps
        f.create_dataset('photon_maps', new_shape, chunks=chunk_size, dtype=np.uint8)

        # Set the metadata attributes
        f.attrs['run'] = run
        f.attrs['dark_run'] = dark_runNB[index]
        f.attrs['module'] = module
        f.attrs['proposal'] = 2884
        f.attrs['field'] = current[index]
        f.attrs['date_processed'] = datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')

    return


def update_photon_maps(run, module, chunk_size=(10, 200, 128, 512), overwrite_file=False):
    """
    During the Lingjia/Zoey meeting on 10/18/2024 we noticed that the low q 
    photon intensity was too. After further investigating, it appears that 
    the photon maps saved in the folder: scratch/LS/Reduced_Data/reduced_3-17-23/
    requires an additional gain multiplication and rounding to be converted to 
    actual photon arrival events. All data processed before 10/18/2024 using 
    the photon statistics method should not be trusted.
    
    Converts the "*_photon.npy" data that was saved in the reduced_3-17-23 folder
    to (hopefully) real photon maps with discrete values in HDF5 format.

    Parameters:
    run (int): Run number of the experiment.
    module (int): Module number of the detector.
    chunk_size (tuple, optional): Chunk size for the HDF5 dataset. Defaults to (2000, 128, 512).
    overwrite_file (bool, optional): Whether to overwrite the existing file. Defaults to False.
    """
    # Define the folder path containing the photon data files
    folder = f'/gpfs/exfel/u/scratch/SCS/202201/p002884/LS/Reduced_Data/reduced_3-17-23/'

    # Get the list of files containing photon data
    files = sorted(glob(folder+f'r{run:0d}m{module:0d}_*_photon.npy'), 
                   key=lambda s: int(re.findall(r'\d+', s)[-1]))

    # Check if the output file already exists
    if not overwrite_file and os.path.exists(output_file):
        print('Output file already exists.')
    else:
        # Create the HDF5 file and initialize its metadata
        write_photon_map_h5_output_path(files, chunk_size, run, module)

    # Iterate over the files and update the photon maps in the HDF5 file
    for i, file in tqdm(enumerate(files), total=len(files)):
        # Load the photon data from the file
        data = da.from_array(np.load(file, 'r'))

        # Rechunk the data to match the chunk size of the HDF5 dataset
        data = data.rechunk(chunks=(10, -1, -1, -1))

        # Convert the photon data to real photon maps
        photon_maps = convert_to_real_photon_maps(data)
        
        photon_maps = photon_maps.reshape((-1, *photon_maps.shape[-3:]))
        photon_maps = photon_maps.compute()

        # Calculate the start and stop indices for storing the photon maps
        start_index = i*(300)
        stop_index = start_index + (data.shape[0])

        # Define the output file path
        output_file = f'./data/photon_maps/run{run:03d}_module{module:02d}.h5'
        # Store the photon maps in the HDF5 file
        with h5py.File(output_file, 'a') as f:
            f['photon_maps'][start_index:stop_index] = photon_maps 
    return

In [None]:
update_photon_maps(run=70, module=0, overwrite_file=True)

  0%|          | 0/61 [00:00<?, ?it/s]