# Prepare dummy data

In [66]:
import pandas as pd
from merfishing import Merfish
import h5py
import xarray as xr
import pathlib

In [2]:
merfish = Merfish('../../../202205231554_MouseSagittalM3S1_VMSC01101/output/region_0/')

MERFISH Experiment Region
Region data located at /gale/netapp/cemba3c/merfish/dummy_experiment/output/region_0/../../../202205231554_MouseSagittalM3S1_VMSC01101/output/region_0
Experiment dir located at /gale/netapp/cemba3c/merfish/dummy_experiment/output/region_0/../../../202205231554_MouseSagittalM3S1_VMSC01101
/gale/netapp/cemba3c/merfish/dummy_experiment/output/region_0/../../../202205231554_MouseSagittalM3S1_VMSC01101/output/region_0 does not contain cellpose results or the results are incomplete,using watershed results from vizgen pipeline instead.


In [3]:
cell_meta = merfish.get_cell_metadata(fov=0)
transcripts = merfish.get_transcripts(fov=0)

## Save

In [4]:
# turn fov 0 into four FOVs
xmid, ymid = transcripts[['global_x', 'global_y']].median()
judge = pd.DataFrame({
    'xjudge': transcripts['global_x'] < xmid,
    'yjudge': transcripts['global_y'] < ymid
})
transcripts['fov'] = judge['xjudge'] * 1 + judge['yjudge'] * 2
transcripts.sort_index().to_csv('detected_transcripts.csv.gz')

In [5]:
judge = pd.DataFrame({
    'xjudge': cell_meta['center_x'] < xmid,
    'yjudge': cell_meta['center_y'] < ymid
})
cell_meta['fov'] = judge['xjudge'] * 1 + judge['yjudge'] * 2
cell_meta.sort_index().to_csv('cell_metadata.csv.gz')

In [12]:
df = pd.read_csv(merfish.cell_by_gene_path, index_col=0)
df.loc[cell_meta.index].astype(int).to_csv('cell_by_gene.csv.gz')

In [17]:
boundries = merfish.get_cell_boundaries(0)
pathlib.Path('cell_boundaries').mkdir(exist_ok=True)

hdf_handles = [h5py.File(f'cell_boundaries/feature_data_{i}.hdf5', 'w') for i in range(4)]
for cell, row in cell_meta.iterrows():
    fov = int(row['fov'])
    handle = hdf_handles[fov]
    handle[f'/featuredata/{cell}/z_coordinates'] = boundries[cell].z_coords
    for z in range(7):
        handle.create_group(f'/featuredata/{cell}/zIndex_{z}')
        try:
            data = boundries[cell][z]
            handle[f'/featuredata/{cell}/zIndex_{z}/p_0/coordinates'] = data
        except KeyError:
            pass
for h in hdf_handles:
    h.close()

In [86]:
xmin, ymin, xmax, ymax = merfish.get_fov_pixel_extent_from_transcripts(0, 300)
image_dir = '../../../test/output/region_0/images/'

images = ['DAPI', 'PolyT', 'Snap25', 'Mbp']

for image_name in images:
    image_path = f'{image_dir}/mosaic_{image_name}.zarr'
    # only keep values in this fov
    image = xr.open_zarr(image_path).sel(y=slice(0, ymax), x=slice(0, xmax))
    image[f'mosaic_{image_name}'][:, :ymin, :] = 0
    image[f'mosaic_{image_name}'][:, :, :xmin] = 0
    image[f'mosaic_{image_name}'].encoding['chunks'] = (1, 5000, 5000)
    image.to_zarr(f'images/mosaic_{image_name}.zarr', mode='w')


In [87]:
image

Unnamed: 0,Array,Chunk
Bytes,785.40 MiB,150.74 MiB
Shape,"(7, 26052, 2258)","(7, 5000, 2258)"
Count,420 Tasks,6 Chunks
Type,uint16,numpy.ndarray
"Array Chunk Bytes 785.40 MiB 150.74 MiB Shape (7, 26052, 2258) (7, 5000, 2258) Count 420 Tasks 6 Chunks Type uint16 numpy.ndarray",2258  26052  7,

Unnamed: 0,Array,Chunk
Bytes,785.40 MiB,150.74 MiB
Shape,"(7, 26052, 2258)","(7, 5000, 2258)"
Count,420 Tasks,6 Chunks
Type,uint16,numpy.ndarray


In [88]:
merfish.transform.pixel_to_micron_transform([[0, 0], [2258, 26052]])

array([[  94.269966, -253.84212 ],
       [ 338.1303  , 2559.733   ]], dtype=float32)