In [1]:
import os
import re
import glob
import yaml
import argparse

import numpy as np
import pandas as pd
from matplotlib import cm

from math import ceil

import tifffile

import napari
import zarr
import dask.array as da

from utils.utility_functions import single_channel_pyramid

In [2]:
# Specify which clustering(s) to visualize
# Options: 'Leiden', 'HDBSCAN', 'FlowSOM', 'Consensus', 'VAE20_ROT_VIG40', 'VAE9_ROT_VIG18', 'VAE20', 
# 'VAE9', 'VAE9-VAE20', 'NMF_C2', 'NMF_C3', 'NMF_C4', 'NMF_C5', 'NMF_C7', 'NMF_C10', 'NMF_C15' 

cluster_categories = ['VAE20_Leiden']

In [3]:
# Paths and inputs
sample = 'CRC-097'

# Read OME-TIFF, segmentation outlines, and H&E channels
tif_path = os.path.join(os.getcwd(), f'input/{sample}_image.ome.tif')
seg_path = os.path.join(os.getcwd(), f'input/{sample}_seg_outlines.ome.tif')
he_path = os.path.join(os.getcwd(), 'input/hema_eosin.ome.tif')

# Read single-cell sample for VAE analysis
main = pd.read_csv(os.path.join(os.getcwd(), 'input/main.csv'))

# REMOVE WHEN DONE
# df = pd.read_csv('/Users/greg/Downloads/vae_latent_vectors-cells (10).csv')
# main['test'] = df['Cluster']

# Get list of markers that were excluded from analysis
with open(os.path.join(os.getcwd(), 'input/CRC-97_cylinter_config.yml')) as f:
    config = yaml.safe_load(f)
markers_to_exclude = config['markersToExclude']

# Import markers.csv
markers = pd.read_csv(os.path.join(os.getcwd(), 'input/CRC-097_mcmicro_markers.csv'))

# Import image contrast settings
with open(os.path.join(os.getcwd(), 'input/CRC-097_cylinter_contrast_limits.yml')) as f:
    contrast_limits = yaml.safe_load(f)

# The parquet file at the path below is being read because "main.csv" 
# uses trimmed marker channel names as column headers that differ from the raw channel names used 
# in the markers.csv file, which is itself used to index channels in the OME-TIFF image.
for_channels = pd.read_parquet(
    os.path.join(os.getcwd(), 'input/CRC-097_clean_cylinter_clustering_3d_leiden.parquet')
)

# Isolate antibodies of interest
abx_channels = [
    i for i in for_channels.columns if 'nucleiRingMask' in i if
    'Hoechst' not in i if i not in markers_to_exclude
]

# Get name of first DNA channel
dna1 = markers['marker_name'][markers['channel_number'] == 1][0]
dna_moniker = str(re.search(r'[^\W\d]+', dna1).group())

out = os.path.join(os.getcwd(), 'output/cluster_locations')
if not os.path.exists(out):
    os.makedirs(out)

In [4]:
# Add H&E image to Napari viewer as separate RGB channels
for color, channel in zip(['red', 'green', 'blue'], [0, 1, 2]):

    img, min, max = single_channel_pyramid(glob.glob(he_path)[0], channel=channel)

    if channel == 0:
        viewer = napari.view_image(
            img, rgb=False, colormap=color, blending='additive',
            visible=False, name=f'H&E_{color}', contrast_limits=(min, max)
        )
    else:
        viewer.add_image(
            img, rgb=False, colormap=color, blending='additive',
            visible=False, name=f'H&E_{color}', contrast_limits=(min, max)
        )

# Add H&E image to Napari viewer as a single channel image
# from lazy_ops import DatasetView
# tiff = tifffile.TiffFile(he_path, is_ome=False)
# pyramid = [
#     zarr.open(tiff.series[0].levels[0].aszarr())[i] for i in
#     list(range(len(tiff.series[0].levels)))
#     ]
# pyramid = [DatasetView(i).lazy_transpose([1, 2, 0]) for i in pyramid]
# pyramid = [da.from_zarr(z) for z in pyramid]
#
# viewer = napari.view_image(pyramid, rgb=True, name='H&E')

In [5]:
# Add DNA1 channel to Napari image viewer
dna, min, max = single_channel_pyramid(glob.glob(tif_path)[0], channel=0)
viewer.add_image(
    dna, rgb=False, blending='additive',
    colormap='gray', visible=True, opacity=1.0,
    name='DNA1', contrast_limits=(min, max)
)

<Image layer 'DNA1' at 0x1633e3790>

In [6]:
# Add marker channels and apply previously defined contrast limits
for ch in abx_channels:
    ch = ch.rsplit('_', 1)[0]
    channel_number = markers['channel_number'][markers['marker_name'] == ch]
    img, min, max = single_channel_pyramid(
        glob.glob(tif_path)[0], channel=(channel_number.item() - 1)
    )
    viewer.add_image(
        img, rgb=False, blending='additive', colormap='green', visible=False, name=ch,
        contrast_limits=(min, max)
    )
for ch in abx_channels:
    ch = ch.rsplit('_', 1)[0]
    viewer.layers[ch].contrast_limits = (
        contrast_limits[ch][0], contrast_limits[ch][1])

In [7]:
# Add centroids of cells for each cluster
for clustering in cluster_categories:
    
    num_colors = len(list(cm.tab20.colors))
    num_clusters = len(main[clustering].unique())
    palette_multiplier = ceil(num_clusters / num_colors)
    colors = list(cm.tab20.colors) * palette_multiplier
    colors = colors[0:num_clusters]
    colors.reverse()
    
    ##################################################################
    
    if clustering == 'VAE9-VAE20':
        
        # Drop noisy (i.e. -1) VAE clusters
        main = main[main['VAE20'] != -1]
        main = main[main['VAE9'] != -1]
        main = main[main[clustering] != -1]
    
        # Filter low-abundance VAE tuple labels
        label_sizes = main.groupby(clustering).size()
        selected_labels = label_sizes.index[label_sizes > 122]
        main = main[main[clustering].isin(selected_labels)]
        main.groupby(['VAE9', clustering]).size().sort_values(ascending=False)
        len(main[main[clustering].isin(selected_labels)][clustering].unique())
    
        VAE20_annos = pd.read_csv('/Users/greg/Dropbox (HMS)/Baker_VAE_2023/figures/v12/SupTable3.csv')
        VAE9_annos = pd.read_csv('/Users/greg/Dropbox (HMS)/Baker_VAE_2023/figures/v12/SupTable4.csv')
    
        cell_context = [VAE20_annos['Call'][VAE20_annos['Cluster'] == i].values[0] for i in main['VAE20']]
        cell_state = [VAE9_annos['Call'][VAE9_annos['Cluster'] == i].values[0] for i in main['VAE9']]
    
        main['context'] = [i for i in cell_context]
        main['state'] = [i for i in cell_state]
    
        table = (
            main[[clustering, 'VAE_Tuples', 'state', 'context']]
            .groupby(clustering, as_index=False)
            .first()
            .sort_values(by=clustering)
        )
        table.rename(
            columns={clustering: 'Label', 'VAE_Tuples': 'Cluster Tuple',
                     'state': 'Cell State', 'context': 'Tissue Context'}, inplace=True
        )
        table.to_csv(os.path.join(out, 'tuples.csv'), index=False)

        # Sort tuple table by tissue context cluster
        table['sort1'] = [int(i.split('_')[1]) for i in table['Cluster Tuple']]
        table['sort2'] = [int(i.split('_')[0]) for i in table['Cluster Tuple']]
        table.sort_values(by=['sort1', 'sort2'], ascending=False, inplace=True)
    
        # Or by order of appearance in vae_clustermap.py
        # custom_sort = [
        #     71, 102, 83, 105, 112, 151, 92, 136, 65, 68, 87, 82, 30, 144, 121, 139, 77,
        #     86, 63, 19, 113, 23, 88, 17, 133, 91, 50, 76, 90, 141, 59, 89, 3, 60, 47,
        #     135, 150, 42, 36, 56, 69, 115, 118, 8, 2, 81, 147, 15, 33, 34, 73, 58, 51,
        #     44, 13, 14, 84, 131, 140, 132, 20, 5, 22, 74, 100, 46, 96, 40, 1, 114, 25,
        #     130, 148, 21, 53, 6, 7, 11, 55, 39, 4, 12, 18, 101, 67, 57, 26, 27, 54, 85,
        #     110, 116, 70, 106, 98, 143, 29, 41, 32, 24, 128, 79, 99, 52, 117, 38, 62,
        #     122, 9, 97, 104, 61, 80, 64, 95, 124, 109, 111, 129, 75, 108, 149, 35, 66,
        #     28, 94, 107, 37, 103, 119, 31, 134, 146, 10, 45, 127, 0, 72, 120, 125, 142,
        #     78, 126, 145, 123, 137, 16, 43, 93, 49, 48, 138
        # ]
        # df_mapping = pd.DataFrame({'Label': custom_sort})
        # sort_mapping = df_mapping.reset_index().set_index('Label')
        # table['sort'] = table['Label'].map(sort_mapping['index'])
        # table.sort_values(by='sort', ascending=False, inplace=True)
    
        for c, cluster in zip(colors, table['Label']):
            centroids = main[['Y_centroid', 'X_centroid']][main[clustering] == cluster]
            viewer.add_points(
                centroids,
                name=(
                    f"{main[clustering][main[clustering] == cluster].iloc[0]} "
                    f"{main['VAE_Tuples'][main[clustering] == cluster].iloc[0]} "
                    f"STATE: {main['state'][main[clustering] == cluster].iloc[0]}; "
                    f"CONTEXT: {main['context'][main[clustering] == cluster].iloc[0]}"
                ),
                face_color=np.array(c), edge_color=np.array(c), visible=False,
                edge_width=0.0, size=50.0, opacity=1.0, blending='translucent'
            )
    
    ##################################################################
    
    else:
        for c, cluster in zip(colors, sorted(main[clustering].unique(), reverse=True)):
            centroids = main[['Y_centroid', 'X_centroid']][main[clustering] == cluster]
            viewer.add_points(
                centroids, name=f'{clustering}_{cluster}', face_color=np.array(c), edge_color='white',
                edge_width=0.0, size=60.0, opacity=1.0, blending='translucent', visible=False
            )

In [8]:
# 20um (30px) meta-cluster groups 
transformed_epithelial = [
    29, 31, 13, 51, 48, 56, 54, 24, 55, 57, 52, 53, 25, 47, 27, 49, 50, 46, 15,
    12, 42, 10, 32, 40, 41, 14, 11]
normal_epithelial = [1]
rbm = [2]
lymph_follicles = [3, 4, 5, 6]
granulation = [7, 44, 45, 43]
cd45_bright = [21, 22]
stroma = [8, 16, 26, 20, 28, 30, 37, 36, 33]
entropic = [35, 18, 19, 9, 17, 39, 38, 34, 23]
artifact = [0]

# 9um (14px) meta-cluster groups
rbm = [0, 17, 18]
normal_epithelial = [1, 47, 49]
artifact = [2, 3, 4, 16]
transformed_epithelial = [5, 6, 7, 8, 10, 11, 12, 34, 45, 46]
stroma = [
    9, 24, 26, 28, 29, 31, 32, 33, 35, 36, 37, 38, 39, 40, 41,
    42, 43, 44, 50, 51]
cd45_bright = [13, 14, 15, 19, 20, 22, 23]
granulation = [21, 25]
lymph_follicles = [27, 30]

In [9]:
# Read segmentation outlines
seg, min, max = single_channel_pyramid(glob.glob(seg_path)[0], channel=0)
viewer.add_image(
    seg, rgb=False, blending='additive',
    colormap='gray', visible=False,
    name='segmentation', opacity=0.3, contrast_limits=(min, max)
)

<Image layer 'segmentation' at 0x16e6f45b0>

In [10]:
# Run Napari image viewer
viewer.scale_bar.visible = True
viewer.scale_bar.unit = 'um'

napari.run()