In [None]:
# Develop pulling stats from each channel imge from a nucleus dataset

In [None]:
import pandas as pd
import numpy as np
import h5py

from tqdm.auto import tqdm

%matplotlib inline
from matplotlib import pyplot as plt
import seaborn as sns

In [None]:
h5f = h5py.File("/dev/shm/dataset.hdf5", "r+")

In [None]:
print(h5f.keys())
print(h5f['cells'].keys())
print(h5f['meta'].keys())

In [None]:
def post_process_channel_stats(h5f, group_name='intensity', return_values=True):
    """
    Use data stored in hdf5 cell image dataset to get channel means per cell

    - if 'meta/nuclear_masks' is a dataset in h5f, use it to restrict the means
    to the area of the focused cell.
    - otherwise, take the overall area in each area.

    Args:
        h5f (h5py.File object)
        group_name (str): Group to place the means (default: intensity)
        return_values (bool): If true, return np.arrays, if false, write to the h5f dataset (h5f must be in w or r+ mode).
    Returns:
        vals (dict): keys: channel names
    """
    n_cells = len(h5f['meta/Cell_IDs'])
    channel_names = [b.decode('UTF-8') for b in h5f['meta/channel_names'][:]]
    vals = {k: np.zeros(n_cells, dtype=np.float32) for k in channel_names}

    masks = h5f['meta/nuclear_masks'][:]
    for channel in channel_names:
        data_stack = h5f[f'cells/{channel}'][:]
        pbar = tqdm(range(n_cells))
        pbar.set_description(f'Channel {channel}')
        for i in pbar:
            mask = masks[i]
            data = data_stack[i]
            data = data[mask]
            vals[channel][i] = np.mean(data)
            if i % 50000 == 0:
                pbar.set_description(f'Channel {channel} running mean: {np.mean(vals[channel]):3.4e}')
    for channel in channel_names:
        d = h5f.create_dataset(f'{group_name}/{channel}', data=vals[channel])
        d.attrs['description'] = f'mean intensity of {channel} channel'
    h5f.flush()
    
    if return_values:
        return vals 

# vals = post_process_channel_stats(h5f)

In [None]:
post_process_channel_stats(h5f, return_values=False)

In [None]:
print(h5f.keys())
print(h5f['cells'].keys())
print(h5f['meta'].keys())
print(h5f['intensity'].keys())

In [None]:
h5f.close()