In [None]:
# Import packages
import warnings
from pathlib import Path

import anndata as ad
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scanpy as sc
import scanpy.external as sce
import seaborn as sns
import skimage
import yaml
from matplotlib.colors import LinearSegmentedColormap
from phenoscapes.feature_extraction import extract_features
from phenoscapes.montage import generate_overview_montage
from phenoscapes.sc import convert_to_h5ad, plot_summary
from phenoscapes.utils import annotate_img, get_metadata, scale_image
from skimage import io
from skimage.color import label2rgb
from tqdm import tqdm

warnings.simplefilter(action="ignore", category=Warning)
np.random.seed(0)

import os

import matplotlib
from matplotlib.colors import ListedColormap
from numba import config as config_numba

config_numba.CPU_NAME = "generic"
matplotlib.rcParams["pdf.fonttype"] = 42
matplotlib.rcParams["ps.fonttype"] = 42

In [None]:
adata = ad.read_h5ad("brain_organoid_4i_ecm_cytoplasm_nuclei.h5ad")

# Define colors
color_volume_midnight_blue = matplotlib.colors.LinearSegmentedColormap.from_list(
    "", ["#c9c7c7", "#191970"]
)
color_pallete_perturbation = {"Matrigel": "#17ad97", "No Matrix": "#4d4d4d"}
colors = sns.color_palette("hls", len(np.unique(adata.obs["leiden"])))

In [None]:
# Annotations based on the dotplot
gray_turqoise_midnight_blue = matplotlib.colors.LinearSegmentedColormap.from_list(
    "", ["#f2f2f2", "#8afbff", "#191970"]
)

sc.tl.rank_genes_groups(adata, groupby="leiden", method="wilcoxon")
sc.pl.rank_genes_groups_dotplot(
    adata,
    n_genes=4,
    cmap=gray_turqoise_midnight_blue,
    standard_scale="var",
)

In [None]:
# Annotations
adata.obs["Cluster_annotations"] = np.nan
adata.obs["Cluster_annotations"][adata.obs["leiden"] == "0"] = "N. Epi. 1"
adata.obs["Cluster_annotations"][adata.obs["leiden"] == "10"] = "N. Epi. 2"
adata.obs["Cluster_annotations"][
    adata.obs["leiden"] == "11"
] = "NC neurons"  # Already updated
adata.obs["Cluster_annotations"][adata.obs["leiden"] == "16"] = "Non-Tel. neurons"
adata.obs["Cluster_annotations"][adata.obs["leiden"] == "15"] = "NCCs"
adata.obs["Cluster_annotations"][adata.obs["leiden"] == "17"] = "Unknown"
adata.obs["Cluster_annotations"][adata.obs["leiden"] == "1"] = "Tel. Prog. 1"
adata.obs["Cluster_annotations"][adata.obs["leiden"] == "14"] = "Tel. Prog. 2"
adata.obs["Cluster_annotations"][adata.obs["leiden"] == "5"] = "Tel. Prog. 3"
adata.obs["Cluster_annotations"][adata.obs["leiden"] == "9"] = "Non-Tel. Prog. 1"
adata.obs["Cluster_annotations"][adata.obs["leiden"] == "8"] = "Non-Tel. Prog. 2"
adata.obs["Cluster_annotations"][adata.obs["leiden"] == "6"] = "Non-Tel. Prog. 3"
adata.obs["Cluster_annotations"][adata.obs["leiden"] == "4"] = "Non-Tel. Prog. 4"
adata.obs["Cluster_annotations"][adata.obs["leiden"] == "7"] = "Non-Tel. Prog. 5"
adata.obs["Cluster_annotations"][adata.obs["leiden"] == "12"] = "Pros. Prog. 1"
adata.obs["Cluster_annotations"][adata.obs["leiden"] == "2"] = "Pros. Prog. 2"
adata.obs["Cluster_annotations"][adata.obs["leiden"] == "3"] = "Die. Prog. 1"
adata.obs["Cluster_annotations"][adata.obs["leiden"] == "13"] = "Die. Prog. 2"

# Updated color dictionary for subclusters
brain_regions_colors = {
    "N. Epi. 1": "#CC9933",
    "N. Epi. 2": "#e8ae3a",
    "NCCs": "#F4A261",
    "NC neurons": "#f70a69",
    "Non-Tel. neurons": "#930740",
    "Die. Prog. 1": "#b36bff",
    "Die. Prog. 2": "#d0a3ff",
    "Pros. Prog. 1": "#506E8A",
    "Pros. Prog. 2": "#6389ad",
    "Tel. Prog. 1": "#049983",
    "Tel. Prog. 2": "#04b59a",
    "Tel. Prog. 3": "#04d4b5",
    "Non-Tel. Prog. 1": "#adbdff",
    "Non-Tel. Prog. 2": "#7f96fa",
    "Non-Tel. Prog. 3": "#5673f5",
    "Non-Tel. Prog. 4": "#385bf5",
    "Non-Tel. Prog. 5": "#0d2ebf",
    "Unknown": "#9e9e9e",
}

In [None]:
adata.obs["Cluster_annotations_col"] = np.nan
adata.obs["Cluster_annotations_col"][adata.obs["leiden"] == "0"] = "N. Epi."
adata.obs["Cluster_annotations_col"][adata.obs["leiden"] == "10"] = "N. Epi."
adata.obs["Cluster_annotations_col"][adata.obs["leiden"] == "11"] = "NC neurons"
adata.obs["Cluster_annotations_col"][adata.obs["leiden"] == "16"] = "Non-Tel. neurons"
adata.obs["Cluster_annotations_col"][adata.obs["leiden"] == "15"] = "NCCs"
adata.obs["Cluster_annotations_col"][adata.obs["leiden"] == "17"] = "Unknown"
adata.obs["Cluster_annotations_col"][adata.obs["leiden"] == "1"] = "Tel. Prog."
adata.obs["Cluster_annotations_col"][adata.obs["leiden"] == "14"] = "Tel. Prog."
adata.obs["Cluster_annotations_col"][adata.obs["leiden"] == "5"] = "Tel. Prog."
adata.obs["Cluster_annotations_col"][adata.obs["leiden"] == "9"] = "Non-Tel. Prog."
adata.obs["Cluster_annotations_col"][adata.obs["leiden"] == "8"] = "Non-Tel. Prog."
adata.obs["Cluster_annotations_col"][adata.obs["leiden"] == "6"] = "Non-Tel. Prog."
adata.obs["Cluster_annotations_col"][adata.obs["leiden"] == "4"] = "Non-Tel. Prog."
adata.obs["Cluster_annotations_col"][adata.obs["leiden"] == "7"] = "Non-Tel. Prog."
adata.obs["Cluster_annotations_col"][adata.obs["leiden"] == "12"] = "Pros. Prog."
adata.obs["Cluster_annotations_col"][adata.obs["leiden"] == "2"] = "Pros. Prog."
adata.obs["Cluster_annotations_col"][adata.obs["leiden"] == "3"] = "Die. Prog."
adata.obs["Cluster_annotations_col"][adata.obs["leiden"] == "13"] = "Die. Prog."

# The brain_regions_colors_col dictionary already looks correct
brain_regions_colors_col = {
    "N. Epi.": "#CC9933",
    "NCCs": "#F4A261",
    "NC neurons": "#f70a69",
    "Non-Tel. neurons": "#930740",
    "Die. Prog.": "#b36bff",
    "Pros. Prog.": "#506E8A",
    "Tel. Prog.": "#049983",
    "Non-Tel. Prog.": "#adbdff",
    "Unknown": "#9e9e9e",
}

In [None]:
hue_order = [
    "Unknown",
    "N. Epi.",
    "Pros. Prog.",
    "NC neurons",
    "NCCs",
    "Non-Tel. Prog.",
    "Non-Tel. neurons",
    "Die. Prog.",
    "Tel. Prog.",
]

brain_regions_colors = {
    "N. Epi.": "#CC9933",
    "NCCs": "#F4A261",
    "NC neurons": "#f70a69",
    "Non-Tel. neurons": "#930740",
    "Die. Prog.": "#b36bff",
    "Pros. Prog.": "#506E8A",
    "Tel. Prog.": "#049983",
    "Non-Tel. Prog.": "#adbdff",
    "Unknown": "#9e9e9e",
}

In [None]:
cross_tab = pd.crosstab(
    adata.obs["Condition"], adata.obs["Cluster_annotations_col"], normalize="index"
).T
cluster_colors = ListedColormap(
    [
        "#9e9e9e",
        "#CC9933",
        "#506E8A",
        "#f70a69",
        "#F4A261",
        "#adbdff",
        "#930740",
        "#b36bff",
        "#049983",
    ]
)
cross_tab = cross_tab.reindex(
    [
        "Unknown",
        "N. Epi.",
        "Pros. Prog.",
        "NC neurons",
        "NCCs",
        "Non-Tel. Prog.",
        "Non-Tel. neurons",
        "Die. Prog.",
        "Tel. Prog.",
    ]
)

matplotlib.rcParams["pdf.fonttype"] = 42
matplotlib.rcParams["ps.fonttype"] = 42

tmp = cross_tab.T.plot(kind="bar", stacked=True, cmap=cluster_colors)
tmp.legend(title="Annotation", bbox_to_anchor=(1.5, 1.02), loc="upper right")
tmp.grid(False)
tmp.spines["top"].set_visible(False)
tmp.spines["right"].set_visible(False)
tmp.spines["bottom"].set_visible(False)
tmp.spines["left"].set_visible(False)
"""
tmp.figure.savefig(
    f"Figures_updated_color_scheme/nuclei_cytoplasmic_compartment_analysis/bar_plot_Matrigel_No_Matrix_condition.pdf",
    bbox_inches="tight",
)
"""

In [None]:
fig, ax = plt.subplots(figsize=(5, 2))
sns.despine(left=True, bottom=True, right=True)
adata.obs["Distance surface"] = adata.obs["distance_surface"] * 0.1625
sns.violinplot(
    adata.obs,
    x="Cluster_annotations_col",
    y="Distance surface",
    cut=0,
    density_norm="width",
    palette=brain_regions_colors_col,
    order=hue_order,
).legend(loc="center left", bbox_to_anchor=(1.0, 0.5), fontsize=8)
ax.tick_params(axis="x", rotation=90)

In [None]:
all_regions = [
    "Unknown",
    "N. Epi.",
    "Pros. Prog.",
    "Neural crest and PNS Neurons",
    "Neural crest cells",
    "Non-Tel. Prog.",
    "PNS Neurons",
    "Die. Prog.",
    "Tel. Prog.",
]

lumen_df = adata.obs[["major_axis_length_lumen", "distance_to_lumen"]]


lumen_df["lumen_id"] = (
    adata.obs["sample"].astype(str) + "_" + adata.obs["label_lumen"].astype(str)
)
lumen_short = lumen_df.groupby(["lumen_id"]).mean()
lumen_df["Cluster_annotations_col"] = adata.obs["Cluster_annotations_col"]
lumen_df["distance_to_lumen"] = lumen_df["distance_to_lumen"] * 0.1625
lumen_short["Cluster_annotations_col"] = lumen_df.groupby(["lumen_id"])[
    "Cluster_annotations_col"
].agg(pd.Series.mode)

sns.set_context("paper", rc={"font.size": 5, "axes.titlesize": 5, "axes.labelsize": 5})
lumen_df = lumen_df[lumen_df["distance_to_lumen"] < 50]
lumen_short["Cluster_annotations_col"] = lumen_df.groupby(["lumen_id"])[
    "Cluster_annotations_col"
].agg(pd.Series.mode)

fig, ax = plt.subplots(figsize=(2, 1))
sns.despine(left=True, bottom=True, right=True)
lumen_short["Major axis \n length lumen"] = (
    lumen_short["major_axis_length_lumen"] * 0.1625
)
sns.violinplot(
    lumen_short,
    x="Cluster_annotations_col",
    y="Major axis \n length lumen",
    cut=0,
    density_norm="width",
    palette=brain_regions_colors_col,
    order=hue_order,
).legend(loc="center left", bbox_to_anchor=(1.0, 0.5), fontsize=6)
ax.tick_params(axis="x", rotation=90)

In [None]:
# Define colors
gray_turqoise_midnight_blue = matplotlib.colors.LinearSegmentedColormap.from_list(
    "", ["#f2f2f2", "#8afbff", "#191970"]
)

sc.pl.umap(
    adata,
    frameon=False,
    color=list(adata.var_names),
    cmap=gray_turqoise_midnight_blue,
)

In [None]:
adata.obs["Cluster_annotations_col"] = adata.obs["Cluster_annotations_col"].astype(
    "category"
)

In [None]:
# Annotations based on the dotplot
sc.set_figure_params(vector_friendly=False, fontsize=6)


gray_turqoise_midnight_blue = matplotlib.colors.LinearSegmentedColormap.from_list(
    "", ["#f2f2f2", "#8afbff", "#191970"]
)


var_names = {
    "Overview": [
        "CDH1_cytoplasm",
        "Human_ITGB5_cytoplasm",
        "IGFBP2_cytoplasm",
        "CDH2_cytoplasm",
        "Pax6_nuclei",
        "RAX_nuclei",
        "GPR177/WLS_cytoplasm",
        "SRFP2_cytoplasm",
        "OTX2_nuclei",
        "SOX10_nuclei",
        "TUBB3_cytoplasm",
        "JAG1_cytoplasm",
        "SOX21_nuclei",
        "GSX2_nuclei",
    ]
}

sc.tl.dendrogram(adata, groupby="Cluster_annotations_col")

In [None]:
sc.tl.rank_genes_groups(adata, groupby="Cluster_annotations", method="wilcoxon")

from matplotlib import pyplot as plt

with plt.rc_context():
    sc.pl.rank_genes_groups_dotplot(
        adata,
        groupby="Cluster_annotations_col",
        var_names=var_names,
        cmap=gray_turqoise_midnight_blue,
        standard_scale="var",
        show=False,
        return_fig=False,
        figsize=(6.55, 3),
    )

In [None]:
sc.set_figure_params(dpi=200, vector_friendly=False)

# Updated color dictionary for subclusters
brain_regions_colors = {
    "N. Epi. 1": "#CC9933",
    "N. Epi. 2": "#e8ae3a",
    "NCCs": "#F4A261",  # Updated name
    "NC neurons": "#f70a69",  # Updated name
    "Non-Tel. neurons": "#930740",  # Updated name
    "Die. Prog. 1": "#b36bff",
    "Die. Prog. 2": "#d0a3ff",
    "Pros. Prog. 1": "#506E8A",
    "Pros. Prog. 2": "#6389ad",
    "Tel. Prog. 1": "#049983",
    "Tel. Prog. 2": "#04b59a",
    "Tel. Prog. 3": "#04d4b5",
    "Non-Tel. Prog. 1": "#adbdff",
    "Non-Tel. Prog. 2": "#7f96fa",
    "Non-Tel. Prog. 3": "#5673f5",
    "Non-Tel. Prog. 4": "#385bf5",
    "Non-Tel. Prog. 5": "#0d2ebf",
    "Unknown": "#9e9e9e",
}
with plt.rc_context():
    sc.pl.umap(
        adata,
        color="Cluster_annotations",
        size=15,
        title="",
        frameon=False,
        legend_fontsize="x-small",
        # save="",
        palette=brain_regions_colors,
        show=False,
    )

In [None]:
sc.set_figure_params(dpi=200, vector_friendly=False)

sc.pl.umap(
    adata,
    color="Day",
    size=4,
    title="",
    frameon=False,
    legend_fontsize="x-small",
    palette="tab20",
    # save="_brain_ecm_2_day.pdf",
)

In [None]:
sc.set_figure_params(dpi=200, vector_friendly=False)

sc.pl.umap(
    adata,
    color="Condition",
    size=4,
    title="",
    frameon=False,
    legend_fontsize="x-small",
    # save="_brain_ecm_2_condition.pdf",
    palette=color_pallete_perturbation,
)

In [None]:
import seaborn as sns
from matplotlib.colors import ListedColormap
from phenoscapes.utils import annotate_img, get_metadata, scale_image
from skimage.color import label2rgb
from skimage.transform import rescale
from skimage.util import montage


def to_shape(a, shape):
    y_, x_ = shape
    y, x = a.shape
    y_pad = y_ - y
    x_pad = x_ - x
    return np.pad(
        a,
        ((y_pad // 2, y_pad // 2 + y_pad % 2), (x_pad // 2, x_pad // 2 + x_pad % 2)),
        mode="constant",
    )

In [None]:
samples_montage_clusters = ["R066_0", "R026_0", "R115_0", "R073_0", "R044_0", "R087_0"]

In [None]:
import matplotlib.colors

dir_segmented = Path(dir_output, "segmented_cell_nuclei")
dir_segmented_cell = Path(dir_output, "segmented_cytoplasma")
adata.obs["Cluster_annotations_num"] = pd.Categorical(
    adata.obs["Cluster_annotations"]
).codes

obs_name = "Cluster_annotations_num"
slice_step = 2000
shape = 1024
downscale = 0.25
imgs_full = []
samples_montage_clusters = np.unique(adata.obs["sample"])

colors = [
    matplotlib.colors.to_rgb(hex_color)
    for hex_color in adata.uns["Cluster_annotations_colors"]
]
mask_shapes = []
for sample in tqdm(samples_montage_clusters):
    adata_well = adata[adata.obs["sample"] == sample].copy()
    mask = io.imread(Path(dir_segmented, sample + ".tif"))
    mask = rescale(mask, downscale, order=0, preserve_range=True, anti_aliasing=False)
    mask_shapes.append(max(mask.shape))
max_shape = max(mask_shapes)

for sample in tqdm(samples_montage_clusters):
    adata_well = adata[adata.obs["sample"] == sample].copy()
    mask = io.imread(Path(dir_segmented, sample + ".tif")) + io.imread(
        Path(dir_segmented_cell, sample + ".tif")
    )
    mask = rescale(mask, downscale, order=0, preserve_range=True, anti_aliasing=False)
    mask = to_shape(mask, (max_shape, max_shape))
    mask_colored = np.zeros(mask.shape).astype(np.float32)
    for i in np.unique(adata_well.obs["ID"]):
        adata_i = adata_well[adata_well.obs["ID"] == i]
        mask_colored[mask == i] = (
            1 + np.array(adata_i.obs[obs_name]).astype(np.float32)[0]
        )
    colors_2 = []
    for cluster_num in (np.unique(mask_colored)[1:] - 1).astype(int):
        colors_2.append(colors[cluster_num])
    labels_rgb = label2rgb(mask_colored, bg_label=0, colors=colors_2)
    # labels_rgb[labels_rgb==0.0]=1
    dpi = mpl.rcParams["figure.dpi"]
    fig = plt.figure(figsize=(mask_colored.shape[1] / dpi, mask_colored.shape[0] / dpi))
    fig.tight_layout()
    ax = fig.add_axes([0, 0, 1, 1])
    ax.imshow(labels_rgb)
    ax.axis("off")
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    ax.spines["bottom"].set_visible(False)
    ax.spines["left"].set_visible(False)
    plt.axis("off")
    canvas = plt.gca().figure.canvas
    canvas.draw()
    data = np.frombuffer(canvas.tostring_rgb(), dtype=np.uint8)
    image = data.reshape(canvas.get_width_height()[::-1] + (3,))
    io.imsave(
        f"Figures_updated_color_scheme/nuclei_cytoplasmic_compartment_analysis/cluster_annotation_overlay/overlay_cluster_annotations_{sample}.png",
        image,
        check_contrast=False,
    )

    plt.close()