# Starfish BaristaSeq Processing Example

In [1]:
%gui qt

import os
from copy import deepcopy
from itertools import product

import numpy as np
import pandas as pd
import skimage.filters
import skimage.morphology
from skimage.transform import SimilarityTransform, warp
from tqdm import tqdm

import starfish
import starfish.data
from starfish.spots import SpotFinder
from starfish.types import Axes

Select data for a single field of view. 

In [5]:
exp = starfish.data.SeqFISH(use_test_data=True)

In [6]:
img = exp['fov_000'].get_image('primary')

100%|██████████| 1740/1740 [10:14<00:00,  5.83it/s]


The first step in BaristaSeq is to do some rough registration. For this data, the rough registration has been done for us by the authors, so it is omitted from this notebook.

## Remove image background

To remove image background, BaristaSeq uses a White Tophat filter, which measures the background with a rolling disk morphological element and subtracts it from the image. 

In [7]:
from skimage.morphology import opening, dilation, disk
from functools import partial

If desired, the background that is being subtracted can be visualized

In [12]:
# opening = partial(opening, selem=disk(3))

# background = img.apply(
#     opening,
#     group_by={Axes.ROUND, Axes.CH, Axes.ZPLANE}, verbose=False, in_place=False
# )

# starfish.display(background)

<napari.components._viewer.model.Viewer at 0x1954c0198>

In [16]:
wth = starfish.image.Filter.WhiteTophat(masking_radius=3)
background_corrected = wth.run(img, in_place=False)
starfish.display(background_corrected)

<napari.components._viewer.model.Viewer at 0x195b46748>

## Scale images to equalize spot intensities across channels

The number of peaks are not uniform across rounds and channels, which prevents histogram matching across channels. Instead, a percentile value is identified and set as the maximum across channels, and the dynamic range is extended to equalize the channel intensities

In [24]:
def scale_by_percentile(data, p=99.9):
    data = np.asarray(data)
    cval = np.percentile(data, p)
    data = data / cval
    data[data > 1] = 1
    return data

scaled = background_corrected.apply(
    scale_by_percentile,
    group_by={Axes.ROUND, Axes.CH}, verbose=False, in_place=False
)

In [25]:
starfish.display(scaled)

<napari.components._viewer.model.Viewer at 0x1b8125630>

## Remove residual background

The background is fairly uniformly present below intensity=0.5. However, starfish's clip method currently only supports percentiles. To solve this problem, the intensities can be directly edited in the underlying numpy array. 

In [26]:
from copy import deepcopy
clipped = deepcopy(scaled)
clipped.xarray.values[clipped.xarray.values < 0.7] = 0

In [27]:
starfish.display(clipped)

<napari.components._viewer.model.Viewer at 0x1b81b0978>

## Detect Spots

Detect spots with a local search blob detector that identifies spots in all rounds and channels and matches them using a local search method. The local search starts in an anchor channel (default ch=1) and identifies the nearest spot in all subsequent imaging rounds. 

In [47]:
threshold = 0.5

lsbd = starfish.spots._detector.local_search_blob_detector.LocalSearchBlobDetector(
    min_sigma=(1.5, 1.5, 1.5),
    max_sigma=(8, 8, 8),
    num_sigma=10,
    threshold=threshold,
    search_radius=7
)
intensities = lsbd.run(clipped)
decoded = exp.codebook.decode_per_round_max(intensities.fillna(0))

In [50]:
starfish.display(clipped, intensities)

<napari.components._viewer.model.Viewer at 0x1508e1cf8>

Based on visual inspection, it looks like the spot correspondence across rounds isn't being detected well. Try the PixelSpotDecoder.

In [28]:
psd = starfish.spots.PixelSpotDecoder.PixelSpotDecoder(
    codebook=exp.codebook, metric='euclidean', distance_threshold=0.5, 
    magnitude_threshold=0.1, min_area=7, max_area=50
)
pixel_decoded, ccdr = psd.run(clipped)

100%|██████████| 971/971 [00:15<00:00, 63.36it/s]


In [30]:
ccdr.label_image.shape

(29, 280, 280)

In [31]:
# look at the label image in napari
label_image = starfish.ImageStack.from_numpy_array(np.reshape(ccdr.label_image, (1, 1, 29, 280, 280)))
starfish.display(label_image)

  .format(dtypeobj_in, dtypeobj_out))
100%|██████████| 29/29 [00:00<00:00, 256.36it/s]


<napari.components._viewer.model.Viewer at 0x1b90868d0>

In [43]:
np.unique(ccdr.label_image[19])

array([  0, 911, 923, 932, 933, 934, 935, 936, 937, 938, 939, 940, 941,
       942, 943, 944, 945, 946, 947, 948, 949, 950])

Well, this definitely does not work. It's decoding pixels in subsequent rounds as new unique gene types...

Compare the number of spots being detected by the two spot finders

In [51]:
print("pixel_decoder spots detected", int(np.sum(pixel_decoded['target'] != 'nan')))
print("local search spot detector spots detected", int(np.sum(decoded['target'] != 'nan')))

pixel_decoder spots detected 950
local search spot detector spots detected 53


Report the correlation between the two methods

In [60]:
from scipy.stats import pearsonr

# get the total counts for each gene from each spot detector
pixel_decoded_gene_counts = pd.Series(*np.unique(pixel_decoded['target'], return_counts=True)[::-1])
decoded_gene_counts = pd.Series(*np.unique(decoded['target'], return_counts=True)[::-1])

# get the genes that are detected by both spot finders
codetected = pixel_decoded_gene_counts.index.intersection(decoded_gene_counts.index).drop('nan')

# report the correlation
pearsonr(pixel_decoded_gene_counts[codetected], decoded_gene_counts[codetected])

  r = r_num / r_den


(nan, 1.0)