# Create hybridization.json from a directory of image files

This notebook creates a starfish json file from a directory of files and shows how to load the resulting data using starfish. 

In [1]:
import os
import re
import json
import numpy as np
import glob
from skimage.io import imread, imsave
from collections import Counter, OrderedDict
from typing import Mapping, Dict, List, Generator, Tuple

from starfish.constants import Indices
import hashlib
from itertools import product

In [2]:
def file_hash(filename: str) -> str:
    """return sha256 hash for file"""
    h = hashlib.sha256()
    with open(filename, 'rb', buffering=0) as f:
        for b in iter(lambda : f.read(128*1024), b''):
            h.update(b)
    return h.hexdigest()

In [3]:
# simple experiment metadata object
experiment_metadata = {
    "version": "0.0.0",
    "hybridization_images": "hybridization.json",
    "auxiliary_images": {
        "nuclei": "nuclei.json"
    }
}

## Write a few methods to generate JSON data

The actual execution block is in the next section

In [4]:
def dypfish_files_to_indices(glob_pattern: str) -> Generator[Tuple[str, Dict[Indices, int]], None, None]:
    """yield metadata parsed from the read name of a globbed directory
    
    Parameters
    ----------
    glob_pattern : str
    
    Notes
    -----
    - For DypFISH, hybridization round is not used, so it is just always 1. 
    
    Yields
    ------
    Tuple[str, Dict[Indices, int]] : 
        tuple of filename and a dictionary that contains the tile metadata (channel, hybridization round, and z-plane)
    """
    regex = re.compile(r'img_000000000_Lamp ([A-Z0-9]+?) low_([0-9]{3}).tif')
    files = glob.glob(glob_pattern)
    
    channel_map = {
        'DAPI': 0,
        'CY3': 0,
        'CY5': 1
    }
    
    for f in files:
        dir, basename = os.path.split(f)
        raw_channel, raw_z = re.match(regex, basename).groups()
        z = int(raw_z)
        channel = channel_map[raw_channel]
        yield f, {Indices.CH: channel, Indices.HYB: 0, Indices.Z: z}

def create_hybridization_json(
    files_to_indices_map: Dict[str, Dict[Indices, int]], default_tile_shape=[512, 512], default_tile_format='TIFF')\
        -> dict:
    """Creates a hybridization json file that specifies how the TIFF files construct a 5-d image tensor
    Parameters
    ----------
    files_to_indices_map : Generator[Tuple[str, Dict[Indices, int]], None, None]
        map of file names, to the indices for that file
    
    Returns
    -------
    dict : 
        hybridization json file in starfish v0.0.0 format
        
    """
    tiles = []
    
    for file_name, tile_indices in files_to_indices_map:
        hash_ = file_hash(file_name)
        tiles.append(
            {
                "coordinates": {
                    "x": [0, 0.0001],
                    "y": [0, 0.0001],
                    "z": [0, 0.0001],
                },
                "indices": {k.value: v for (k, v) in tile_indices.items()},
                "file": os.path.basename(file_name),
                "sha256": hash_
            }
        )
    
    # get tile shape
    hybs = 1 + max(t["indices"][Indices.HYB] for t in tiles)
    channels = 1 + max(t["indices"][Indices.CH] for t in tiles)
    z_planes = 1 + max(t["indices"][Indices.Z] for t in tiles)

    return {
        "version": "0.0.0",
        "dimensions": ["x", "y"] + list(k.value for k in tile_indices.keys()),
        "default_tile_shape": default_tile_shape,
        "default_tile_format": default_tile_format,
        "shape": {
            f"{Indices.HYB.value}": hybs,
            f"{Indices.CH.value}": channels,
            f"{Indices.Z.value}": z_planes
        },
        "tiles": tiles
    }

# faked up codebook
dypfish_codebook = [
    {
        "codeword": {'c': 0, 'h': 0, 'v': 1}, 
        "gene_name": "gene_1"
    },
    {
        "codeword": {'c': 1, 'h': 0, 'v': 1},
        "gene_name": "gene_2"
    }
]

# Create the starfish specification files

In [5]:
NUCLEI_WILDCARD = "DAPI"
GENE_WILDCARD = "CY"
DIRECTORY = os.path.expanduser("~/Desktop/dypfish/")  # run this notebook in the same directory as the image files

# generate maps of filenames to tile metadata
nuclei_indices = dypfish_files_to_indices(os.path.join(DIRECTORY, '*DAPI*.tif'))
gene_indices = dypfish_files_to_indices(os.path.join(DIRECTORY, '*CY*.tif'))

# create the hybridization json
hyb_json = create_hybridization_json(gene_indices)

# ... and the nuclei json
nuclei_json = create_hybridization_json(nuclei_indices)

# write everything to disk
with open(os.path.join(DIRECTORY, 'hybridization.json'), 'w') as f:
    json.dump(hyb_json, f)
with open(os.path.join(DIRECTORY, 'nuclei.json'), 'w') as f:
    json.dump(nuclei_json, f)
with open(os.path.join(DIRECTORY, 'experiment.json'), 'w') as f:
    json.dump(experiment_metadata, f)
with open(os.path.join(DIRECTORY, 'codebook.json'), 'w') as f:
    json.dump(dypfish_codebook, f)

Now, we can read the data into starfish, and below we display the 1st channel (CY3)

In [6]:
from starfish.io import Stack

s = Stack.from_experiment_json(os.path.join(DIRECTORY, 'experiment.json'))

In [7]:
s.image.show_stack({Indices.CH: 0}, rescale=False)

Rescaling ...


interactive(children=(IntSlider(value=0, description='plane_index', max=13), Output()), _dom_classes=('widget-…

<function starfish.image._stack.ImageStack.show_stack.<locals>.display_slice(plane_index=0)>