In [1]:
import os
import re
import glob
import yaml
import argparse

import numpy as np
import pandas as pd

from math import ceil

from matplotlib import cm

import tifffile

import zarr
import napari
import dask.array as da

from utils.utility_functions import single_channel_pyramid

In [2]:
# specify which clustering(s) in main.csv to visualize
clusterings = ['VAE9_ROT_VIG5']  # 'VAE20_ROT_res2.0', 'VAE9_ROT_VIG18' 'VAE20_Leiden' 'seg'

In [3]:
# I/O
sample = 'CRC-097'

# read single-cell data
main = pd.read_csv(os.path.join(os.getcwd(), 'input/main4.csv'))

# test VAE20_ROT for now
# patches = pd.read_csv('/Users/greg/projects/vae-paper/src/input/VAE20_ROT/6_latent_space_LD850/clustering_full/VAE20_ROT_encodings-patches.csv') 
# df = pd.read_csv('/Users/greg/projects/vae-paper/src/input/VAE20_ROT/6_latent_space_LD850/clustering_full/centroids.csv') 
# main = pd.concat([df, patches['Cluster']], axis=1)

# read OME-TIFF, segmentation outlines, and H&E channels
tif_path = os.path.join(os.getcwd(), f'input/{sample}_image.ome.tif')
seg_path = os.path.join(os.getcwd(), f'input/{sample}_seg_outlines.ome.tif')
he_path = os.path.join(os.getcwd(), 'input/hema_eosin.ome.tif')

# import markers.csv
markers = pd.read_csv(os.path.join(os.getcwd(), 'input/CRC-097_mcmicro_markers.csv'))

# get markers excluded from analysis
with open(os.path.join(os.getcwd(), 'input/CRC-97_cylinter_config.yml')) as f:
    config = yaml.safe_load(f)
markers_to_exclude = config['markersToExclude']

# import image contrast settings
with open(os.path.join(os.getcwd(), 'input/CRC-097_cylinter_contrast_limits.yml')) as f:
    contrast_limits = yaml.safe_load(f)

# The parquet file at the path below is being read because "main.csv" 
# uses trimmed marker channel names as column headers that differ from the raw channel names used 
# in the markers.csv file, which is itself used to index channels in the OME-TIFF image.
for_channels = pd.read_parquet(
    os.path.join(os.getcwd(), 'input/CRC-097_clean_cylinter_clustering_3d_leiden.parquet')
)

# isolate antibodies of interest
abx_channels = [
    i for i in for_channels.columns if 'nucleiRingMask' in i if
    'Hoechst' not in i if i not in markers_to_exclude
]

# get name of first DNA channel
dna1 = markers['marker_name'][markers['channel_number'] == 1][0]
dna_moniker = str(re.search(r'[^\W\d]+', dna1).group())

In [4]:
# add H&E image to Napari viewer as separate RGB channels
for color, channel in zip(['red', 'green', 'blue'], [0, 1, 2]):

    img, min, max = single_channel_pyramid(glob.glob(he_path)[0], channel=channel)

    if channel == 0:
        viewer = napari.view_image(
            img, rgb=False, colormap=color, blending='additive',
            visible=False, name=f'H&E_{color}', contrast_limits=(min, max)
        )
    else:
        viewer.add_image(
            img, rgb=False, colormap=color, blending='additive',
            visible=False, name=f'H&E_{color}', contrast_limits=(min, max)
        )

In [5]:
# OPTIONAL: add H&E image to Napari viewer as a single channel image

# from lazy_ops import DatasetView
# tiff = tifffile.TiffFile(he_path, is_ome=False)
# pyramid = [
#     zarr.open(tiff.series[0].levels[0].aszarr())[i] for i in
#     list(range(len(tiff.series[0].levels)))
#     ]
# pyramid = [DatasetView(i).lazy_transpose([1, 2, 0]) for i in pyramid]
# pyramid = [da.from_zarr(z) for z in pyramid]
#
# viewer = napari.view_image(pyramid, rgb=True, name='H&E')

In [6]:
# add DNA1 channel to image viewer
dna, min, max = single_channel_pyramid(glob.glob(tif_path)[0], channel=0)
viewer.add_image(
    dna, rgb=False, blending='additive',
    colormap='gray', visible=True, opacity=0.8,
    name='DNA1', contrast_limits=(min, max)
)

<Image layer 'DNA1' at 0x142952760>

In [7]:
# add marker channels to image viewer and apply previously defined contrast limits
for ch in abx_channels:
    ch = ch.rsplit('_', 1)[0]
    channel_number = markers['channel_number'][markers['marker_name'] == ch]
    if ch == 'Keratin_570':
        color = np.array([0.12156862745098039, 0.4666666666666667, 0.7058823529411765])
    elif ch == 'aSMA_660':
        color = np.array([1.0, 0.4980392156862745, 0.054901960784313725])
    elif ch == 'CD4_488':
        color = np.array([0.17254901960784313, 0.6274509803921569, 0.17254901960784313])
    elif ch == 'PCNA_488':
        color = np.array([0.8392156862745098, 0.15294117647058825, 0.1568627450980392])
    elif ch == 'CD31_647':
        color = np.array([0.5803921568627451, 0.403921568627451, 0.7411764705882353])
    elif ch == 'FOXP3_570':
        color = np.array([0.8901960784313725, 0.4666666666666667, 0.7607843137254902])
    elif ch == 'CD20_488':
        color = np.array([0.7372549019607844, 0.7411764705882353, 0.13333333333333333])
    elif ch == 'CD8a_660':
        color = np.array([0.09019607843137255, 0.7450980392156863, 0.8117647058823529])
    else:
        color = 'green'
    
    img, min, max = single_channel_pyramid(
        glob.glob(tif_path)[0], channel=(channel_number.item() - 1)
    )
    viewer.add_image(
        img, rgb=False, blending='additive', colormap=color, visible=False, name=ch,
        contrast_limits=(min, max)
    )
for ch in abx_channels:
    ch = ch.rsplit('_', 1)[0]
    viewer.layers[ch].contrast_limits = (
        contrast_limits[ch][0], contrast_limits[ch][1])

In [8]:
# add centroids of cells in each clustering
for clustering in clusterings:
    
    num_colors = len(list(cm.tab20.colors))
    num_clusters = len(main[clustering].unique())
    palette_multiplier = ceil(num_clusters / num_colors)
    colors = list(cm.tab20.colors) * palette_multiplier
    colors = colors[0:num_clusters]
    colors.reverse()

    if clustering == 'VAE9_ROT_VIG18':
        
        # 9um (14px) meta-cluster groups
        epithelial = [24, 25, 20, 14, 31, 9, 10, 3, 5, 6, 11, 15, 19, 27, 28] 
        immune = [26, 30, 8, 12, 16, 21, 7, 2, 22]
        stromal = [1, 4, 13, 18, 29, 0, 17, 23]
        cluster_order = epithelial + immune + stromal
        cluster_order.reverse()
        
        my_dict = dict(zip(sorted(main[clustering].unique(), reverse=True), colors))
        sorted_dict = {key: my_dict[key] for key in cluster_order if key in my_dict}

        for cluster, c in sorted_dict.items():
            centroids = main[['Y_centroid', 'X_centroid']][main[clustering] == cluster]
            viewer.add_points(
                centroids, name=f'{clustering}_{cluster}', face_color=np.array(c), edge_color='white',
                edge_width=0.0, size=60.0, opacity=1.0, blending='translucent', visible=False
            )

    else:
        for c, cluster in zip(colors, sorted(main[clustering].unique(), reverse=True)):
            centroids = main[['Y_centroid', 'X_centroid']][main[clustering] == cluster]
            viewer.add_points(
                centroids, name=f'{clustering}_{cluster}', face_color=np.array(c), edge_color='white',
                edge_width=0.0, size=60.0, opacity=1.0, blending='translucent', visible=False
            )

In [9]:
# add segmentation outlines to image viewer
seg, min, max = single_channel_pyramid(glob.glob(seg_path)[0], channel=0)
viewer.add_image(
    seg, rgb=False, blending='additive',
    colormap='gray', visible=False,
    name='segmentation', opacity=0.3, contrast_limits=(min, max)
)

<Image layer 'segmentation' at 0x15ba49070>

In [10]:
# run image viewer
viewer.scale_bar.visible = True
viewer.scale_bar.unit = 'um'

napari.run()