In [None]:
import sys
sys.executable

In [None]:
%load_ext autoreload
%autoreload 2
from micron2.data import pull_nuclei
from micron2.data import load_as_anndata
import pandas as pd
import pytiff
import h5py
import cv2
import os

datahome = '/home/ingn/tmp/micron2-data/rawdata'

cells = pd.read_csv(f'{datahome}/201021_BreastFFPE_Final/201021_BreastFFPE_Final_3_cells.csv', 
                    index_col=0, header=0)
nuclei_img = f'{datahome}/201021_BreastFFPE_Final/201021_BreastFFPE_Final_2_nuclei.tif'

imagefs = !ls /home/ingn/tmp/micron2-data/rawdata/201021_BreastFFPE_Final/images/*.tif
dapi_images = [f for f in imagefs if 'DAPI' in f]
non_dapi_images = [f for f in imagefs if 'DAPI' not in f]
non_dapi_images = [f for f in non_dapi_images if 'Blank' not in f]
non_dapi_images = [f for f in non_dapi_images if 'Empty' not in f]

channel_names = [os.path.basename(x) for x in non_dapi_images]
channel_names = [x.replace(f'.tif','') for x in channel_names]
channel_names = [x.split('_')[-2] for x in channel_names]
channel_names = ["DAPI"] + channel_names
print(len(channel_names))

image_paths = [dapi_images[0]] + non_dapi_images
print(len(image_paths))

pull_nuclei(cells, 
            image_paths, 
            out_file='/home/ingn/tmp/micron2-data/dataset_v2.hdf5', 
            nuclei_img=nuclei_img,
            size=64,
            min_area=50, 
            tile_size=128,
            channel_names=channel_names,
            overlap=0.3,
            tile_scale_factor=1.,
            debug=False
           )

In [None]:
!ls -lha /home/ingn/tmp/micron2-data/*.hdf5

# Open up the dataset as two AnnData objects: cells and tiles

In [None]:
# Default set to load the "cell" information
adata_cells = load_as_anndata('/home/ingn/tmp/micron2-data/dataset_v2.hdf5')
adata_cells

In [None]:
# Change these to load the "tile" information
adata_tiles = load_as_anndata('/home/ingn/tmp/micron2-data/dataset_v2.hdf5',
                        obs_names='meta/Tile_IDs',
                        featurekey='tile_intensity',
                        coordkey='meta/tile_coordinates')
adata_tiles

# Test streaming from a dataset with tensorflow-io

In [None]:
from micron2.data import stream_dataset
import tqdm.auto as tqdm

In [None]:
# Stream from the 'cells' group
dataset = stream_dataset('/home/ingn/tmp/micron2-data/dataset_v2.hdf5', 
                         group_name='cells')

for i, sample in enumerate(tqdm.tqdm(dataset)):
    break

print(i)
print(sample.shape)

In [None]:
# Stream from the 'images' group
dataset = stream_dataset('/home/ingn/tmp/micron2-data/dataset_v2.hdf5', 
                         group_name='images')

for i, sample in enumerate(tqdm.tqdm(dataset)):
    break

print(i)
print(sample.shape)