

# Star-TREX preprocessing pipeline

### Transform your images into spaceTx format, create the codebook and validate your results

Load required packages

In [1]:
# Load packages
import sys
import os
from pathlib import Path
sys.path.insert(1, os.path.abspath('..'))

Define your working directory and the location of the scheme files in the starfish package. If installation has been performed within a conda environment, your path should be similar to the one below

In [8]:
#Define working directory:
work_dir = "/Users/leonievb/Library/CloudStorage/OneDrive-Personal/Postdoc/Data/02_4_Gene_Test2/OME-TIFF_MaxIP/"
#work_dir = "/Users/leonievb/Library/CloudStorage/OneDrive-Personal/Postdoc/Data/02_4_Gene_Test2/4genepanel_dapi-488-568-657-750/OME-TIFF/"
scheme_path = "/Users/leonievb/anaconda3/envs/starfish/lib/python3.7/site-packages/starfish/spacetx_format/schema/"

## Bring image files into spaceTx format

Let's start with transforming the image folder into structured data. The images need to be provided as one ome.tiff file per round containing all channels and z-planes (if existing). Make sure that the number of created image files matches your expectations.

In [32]:
# Convert images into structured files
from importlib import reload
from src import spacetxer
reload(spacetxer)
from src.spacetxer import process_tiff

input_path = os.path.join(work_dir, "input_images")
output_structured = os.path.join(work_dir, "spacetx")
nuclei = 4
extension = None
fov = 0 #or "multi" if several folders with each one FOV

process_tiff(input_path, output_structured, extension, fov, nuclei)

Saved: /Users/leonievb/Library/CloudStorage/OneDrive-Personal/Postdoc/Data/02_4_Gene_Test2/4genepanel_dapi-488-568-657-750/OME-TIFF/spacetx/primary/primary-f0-r0-c0-z0.tiff
Saved: /Users/leonievb/Library/CloudStorage/OneDrive-Personal/Postdoc/Data/02_4_Gene_Test2/4genepanel_dapi-488-568-657-750/OME-TIFF/spacetx/primary/primary-f0-r0-c0-z1.tiff
Saved: /Users/leonievb/Library/CloudStorage/OneDrive-Personal/Postdoc/Data/02_4_Gene_Test2/4genepanel_dapi-488-568-657-750/OME-TIFF/spacetx/primary/primary-f0-r0-c0-z2.tiff
Saved: /Users/leonievb/Library/CloudStorage/OneDrive-Personal/Postdoc/Data/02_4_Gene_Test2/4genepanel_dapi-488-568-657-750/OME-TIFF/spacetx/primary/primary-f0-r0-c0-z3.tiff
Saved: /Users/leonievb/Library/CloudStorage/OneDrive-Personal/Postdoc/Data/02_4_Gene_Test2/4genepanel_dapi-488-568-657-750/OME-TIFF/spacetx/primary/primary-f0-r0-c0-z4.tiff
Saved: /Users/leonievb/Library/CloudStorage/OneDrive-Personal/Postdoc/Data/02_4_Gene_Test2/4genepanel_dapi-488-568-657-750/OME-TIFF/spa

KeyboardInterrupt: 

Convert structured files into SpaceTx format

In [11]:
from slicedimage import ImageFormat
from starfish.experiment.builder import format_structured_dataset

#primary_out = os.path.join(work_dir, "spacetx2", image_type)
#os.makedirs(primary_out, exist_ok=True)

format_structured_dataset(
    os.path.join(output_structured, "primary"),
    os.path.join(output_structured, "primary", "coordinates.csv"),
    os.path.join(output_structured, "primary"),
    ImageFormat.TIFF,
    in_place=True
)

format_structured_dataset(
    os.path.join(output_structured, "nuclei"),
    os.path.join(output_structured, "nuclei", "coordinates.csv"),
    os.path.join(output_structured, "nuclei"),
    ImageFormat.TIFF,
    in_place=True
)

## Create the codebook

This code will create the codebook from your geneID and key information. Make sure you have the required files in place.
1. A csv file mapping gene symbols to geneIDs: <br>
gene1,geneid1 <br>
gene2,geneid2 <br>
2. A key mapping bases (or combinations of bases) to channel numbers (in order as they appear in the image): <br>
A,1 <br>
C,2<br>
G,3<br>
T,4<br>

Make sure you go through the settings below and adjust to your data, especially the parameters 'n_channels', 'n_rounds', 'two_base_encoding' and 'border_base'.


In [1]:
code_path = os.path.join(work_dir, "../../meta/geneIDs.csv")
key_path = os.path.join(work_dir, "../../meta/key_dapi.csv")
codebook_path = os.path.join(work_dir, "spacetx/primary/codebook.json") #or None
n_channels = 4
n_rounds = 6
two_base_encoding = True
inverse = True
complementary = True
border_base = "G"
trim = None

from importlib import reload
from src import codebooker
reload(codebooker)
from src.codebooker import create_codebook

codebook = create_codebook(code_path, key_path, n_channels, n_rounds, codebook_path, 
                           two_bases_code=two_base_encoding, inverse=inverse,
                            complementary=complementary, border_base=border_base,
                            trim = trim)

NameError: name 'os' is not defined

Alternatively, load an already existing codebook

In [None]:
#codebook_path = os.path.join(work_dir, "structured_out/codebook.json")
#from src.codebooker import load_codebook
#codebook = load_codebook(codebook_path)

Check out the codebook

In [13]:
codebook

(<xarray.Codebook (target: 4, r: 6, c: 4)>
 array([[[0., 1., 0., 0.],
         [0., 0., 1., 0.],
         [0., 0., 1., 0.],
         [0., 0., 0., 1.],
         [0., 1., 0., 0.],
         [0., 0., 0., 1.]],
 
        [[0., 0., 1., 0.],
         [0., 0., 0., 1.],
         [0., 1., 0., 0.],
         [0., 1., 0., 0.],
         [1., 0., 0., 0.],
         [0., 1., 0., 0.]],
 
        [[0., 0., 0., 1.],
         [0., 1., 0., 0.],
         [1., 0., 0., 0.],
         [0., 0., 1., 0.],
         [0., 0., 1., 0.],
         [0., 0., 1., 0.]],
 
        [[1., 0., 0., 0.],
         [1., 0., 0., 0.],
         [0., 0., 0., 1.],
         [1., 0., 0., 0.],
         [0., 0., 0., 1.],
         [1., 0., 0., 0.]]])
 Coordinates:
   * target   (target) object 'Actb' 'Calm1' 'Malat1' 'Snap25'
   * r        (r) int64 0 1 2 3 4 5
   * c        (c) int64 0 1 2 3,
   target_name target_id
 0        Actb     CACCG
 1       Calm1     TTATC
 2      Malat1     GCTGT
 3      Snap25     AGGAA)

Now we wish to validate the created codebook. Starfish provides some functions to do that. It does it by comparing the given codebook against a schema codebook. This can be found within the starfish package in the following location within your venv or conda environment: "/[environment_name]]/lib/python3.7/site-packages/starfish/spacetx_format/schema/codebook/codebook.json"

In [14]:
# Validate codebook
from pkg_resources import resource_filename
from starfish.core.spacetx_format.util import SpaceTxValidator

validator = SpaceTxValidator(os.path.join(scheme_path, "codebook_0.0.0/codebook.json"))

#Use the previously defned codeboo_path to validate your codebook or, alternatively,
# indicate a new path
#codebook_path = "path/to/codebook.json"

if not validator.validate_file(codebook_path):
    raise Exception("Your codebook is invalid")
else:
    print("Your codebook looks good!")

Your codebook looks good!


Now we need to validate the new experiment.json file

In [15]:
# Validate SpaceTx files
from pkg_resources import resource_filename
from starfish.core.spacetx_format.util import SpaceTxValidator

#check if version 0.5.0 or 0.4.0 is required, likely it is 5.0.0
validator = SpaceTxValidator(os.path.join(scheme_path, "experiment_5.0.0.json"))
if not validator.validate_file(os.path.join(output_structured, "primary/experiment.json")):
    raise Exception(f"Your experiment files are invalid")
else:
    print(f"Your experiment files look good!")

Your experiment files look good!


Make sure your experiment data also includes information about your nuclei images. Run this line only once per experiment otherwise it will create duplicate entries.

In [16]:
with open(os.path.join(output_structured, "primary/experiment.json"), "r+") as fh:
    contents = fh.readlines()
    print("original experiment.json\n")
    print("".join(contents))
    contents[3] = ",".join([contents[3].strip("\n"),"\n"])
    contents.insert(4, '\t"nuclei": "../nuclei/nuclei.json"\n')  # new_string should end in a newline
    fh.seek(0)  # readlines consumes the iterator, so we need to start over
    fh.writelines(contents)  # No need to truncate as we are increasing filesize
    fh.seek(0)
    print("\nmodified experiment.json\n")
    print(fh.read())

original experiment.json

{
    "version": "5.0.0",
    "images": {
        "primary": "primary.json"
    },
    "extras": {},
    "codebook": "codebook.json"
}

modified experiment.json

{
    "version": "5.0.0",
    "images": {
        "primary": "primary.json",
	"nuclei": "../nuclei/nuclei.json"
    },
    "extras": {},
    "codebook": "codebook.json"
}


Confirm that the experiment was correctly created by loading it and looking at its shape. If it looks good, well done! The preprocessing steps are completed.

In [None]:
from starfish import Experiment
exp = Experiment.from_json(os.path.join(work_dir, "spacetx", "primary", "experiment.json"))
print(exp)