In [None]:
# Import packages
import warnings
from pathlib import Path

import anndata as ad
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scanpy as sc
import scanpy.external as sce
import seaborn as sns
import skimage
import yaml
from matplotlib.colors import LinearSegmentedColormap
from phenoscapes.feature_extraction import extract_features
from phenoscapes.montage import generate_overview_montage
from phenoscapes.sc import convert_to_h5ad, plot_summary
from phenoscapes.utils import annotate_img, get_metadata, scale_image
from skimage import io
from skimage.color import label2rgb
from tqdm import tqdm

warnings.simplefilter(action="ignore", category=Warning)
np.random.seed(0)

import os

import matplotlib
from numba import config as config_numba

config_numba.CPU_NAME = "generic"

In [None]:
dir_output = "/cluster/project/treutlein/DATA/imaging/4i_Data/Brain_ECM_4i_2_v2/"
ann_output = Path(dir_output, "anndata_combined")
ann_output.mkdir(parents=True, exist_ok=True)

dir_bg_subtracted = Path(dir_output, "bg_subtracted")
ann_dir = Path(dir_output, "anndata")
overview = pd.read_csv("updated_brain_ECM_2.csv")
overview["id"] = overview["Condition"] + overview["Day"].astype(str)
overview["unqiue_id"] = (
    overview["Number"].astype(str)
    + " "
    + overview["Block"].astype(str)
    + " "
    + overview["Condition"]
    + " "
    + overview["Day"].astype(str)
)

morpho_dir = Path(dir_output, "feature_tables_morphometrics_cells")
# Only Day 21
# overview=overview[overview["Day"]==21]
# Save membrane_mask,ecm_mask,nuceli mask
ann_dir_cytoplasma = Path(dir_output, "anndata_mean_cytoplasma")
ann_dir_ecm_niche = Path(dir_output, "anndata_mean_ecm_niche")
ann_dir_nuclei = Path(dir_output, "anndata_mean_cell_nuclei")

samples = [  # Day 7
    "R066_0",
    "R062_0",
    "R073_0",
    "R072_0",
    # Day 15
    #'R021_0',
    "R026_0",
    "R022_0",
    "R024_0",
    "R049_0",
    "R055_0",
    "R044_0",
    "R057_0",
    # Day 21
    "R076_1",
    "R087_0",
    "R082_0",
    "R108_1",
    "R112_1",
    "R113_1",
    "R115_0",
]


ecm_genes = [
    "Col4A1",
    "Col2A1",
    "Fibronectin",
    "Laminin",
    #'WNT5a',
    "IGFBP2",
    "HAPLN1",
    "VCAN",
    #'SFRP2'
]

cytoplamsa_membrane = [
    "Cytokeratin",
    "TUBB3",
    "pVIM",
    "B-cat",
    "Human",
    "ITGA5B1",
    "Vinculin",
    "Nes",
    "WNT5a",
    "IGFBP2",
    "Arl13b",
    "Piezo1",
    "Scribble",
    "FLNA",
    "RSPO3",
    "n-Cad",
    "CDH1",
    "ITGB5",
    "GPR177WLS",
    "DLL1",
    "VIM",
    #'ITGB1',
    "JAG1",
    "GPC3",
    "NUMB",
    "Vangl2",
]

nuclear = [
    "OTX2",
    "PAX37",
    "OCT4",
    "PH3S0",
    "SOX2",
    "SOX10",
    "NR2F1COUP=TFI",
    "PROM1CD133",
    "TBR1",
    "HES1",
    "GSX2",
    "Gli3",
    "PAX3",
    "RAX",
    "Pax6",
    "Sox21",
    "HES4",
    "Vangl2",
]

In [None]:
anndatas_compartements = []
for ann_dir, compartement, good_stains in zip(
    [ann_dir_ecm_niche], ["ecm"], [ecm_genes]
):
    anndatas = []
    for sample in samples:
        number = int(sample.split("_")[0].split("R")[1])

        if (overview["Number"] == number).any():
            adata_org = ad.read_h5ad(Path(ann_dir, f"{sample}.h5ad"))
            adata_org = adata_org[adata_org.obs["area"] > 100]
            df = pd.read_csv(Path(morpho_dir, f"{sample}.csv"))
            df = df[df["label"].isin(adata_org.obs["ID"])]

            assert (np.array(df["label"]) == np.array(adata_org.obs["ID"])).all()

            adata_org.obs["Day"] = overview[overview["Number"] == number].iloc[0]["Day"]
            adata_org.obs["Condition"] = overview[overview["Number"] == number].iloc[0][
                "Condition"
            ]
            adata_org.obs["Block"] = overview[overview["Number"] == number].iloc[0][
                "Block"
            ]
            adata_org.obs["sample_num"] = overview[overview["Number"] == number].iloc[
                0
            ]["Number"]
            adata_org.obs["sample"] = sample

            for feature in df.columns:
                adata_org.obs[feature] = np.array(df[feature])

            anndatas.append(adata_org)

    adata = ad.concat(anndatas)
    stain_vector = [x in good_stains for x in adata.var_names]
    adata = adata[:, stain_vector]

    # Unique id for vars
    adata.var_names = np.array(adata.var_names) + "_" + compartement

    # Unique id for obs
    adata.obs["unique_id"] = adata.obs["ID"].astype(str) + adata.obs["sample"].astype(
        str
    )
    adata.obs_names = adata.obs["unique_id"]

    # Remove small nuclei/cytoplasmas
    anndatas_compartements.append(adata)
adata = ad.concat(anndatas_compartements, axis=1, merge="same")
# assert len(ecm_genes) == len(adata.var_names)
adata.obs["sample_num_str"] = adata.obs["sample_num"].astype(str)

# Load other h5 and only keep is in unique id
ann_path = Path(dir_output, "brain_ecm_4i_2_v2_mean.h5ad")
adata_cyto_nuc = ad.read_h5ad(ann_path)
adata = adata[adata.obs["unique_id"].isin(adata_cyto_nuc.obs["unique_id"])]

In [None]:
adata_ecm = adata.copy()

In [None]:
adata.obs["Day"] = adata.obs["Day"].astype(str)

sc.pp.scale(adata)
sc.tl.pca(adata)
sc.pl.pca_variance_ratio(adata)
sc.pp.neighbors(adata, n_pcs=6, use_rep="X_pca")
sc.tl.leiden(adata, resolution=0.3)
sc.tl.umap(adata, min_dist=0.2)
adata.write_h5ad(Path(dir_output, f"brain_ecm_4i_2_v2_mean_ecm.h5ad"))

In [None]:
colors = sns.color_palette("hls", len(np.unique(adata.obs["leiden"])))

In [None]:
import seaborn as sns
from matplotlib.colors import ListedColormap
from phenoscapes.utils import annotate_img, get_metadata, scale_image
from skimage.color import label2rgb
from skimage.transform import rescale
from skimage.util import montage


def to_shape(a, shape):
    y_, x_ = shape
    y, x = a.shape
    y_pad = y_ - y
    x_pad = x_ - x
    return np.pad(
        a,
        ((y_pad // 2, y_pad // 2 + y_pad % 2), (x_pad // 2, x_pad // 2 + x_pad % 2)),
        mode="constant",
    )


samples_montage_clusters = np.unique(adata.obs["sample"])
dir_segmented = Path(dir_output, "segmented_cell_nuclei")
dir_segmented_cell = Path(dir_output, "segmented_cytoplasma")
dir_segmented_ecm = Path(dir_output, "segmented_ecm_niche")
colors = sns.color_palette("hls", len(np.unique(adata.obs["leiden"])))

obs_name = "leiden"
slice_step = 2000
shape = 500
downscale = 0.25
imgs_full = []


mask_shapes = []
for sample in tqdm(samples_montage_clusters):
    adata_well = adata[adata.obs["sample"] == sample].copy()
    mask = io.imread(Path(dir_segmented, sample + ".tif"))
    mask = rescale(mask, downscale, order=0, preserve_range=True, anti_aliasing=False)
    mask_shapes.append(max(mask.shape))
max_shape = max(mask_shapes)

for sample in tqdm(samples_montage_clusters):
    adata_well = adata[adata.obs["sample"] == sample].copy()

    mask = io.imread(Path(dir_segmented_ecm, sample + ".tif"))

    mask = rescale(mask, downscale, order=0, preserve_range=True, anti_aliasing=False)
    mask = to_shape(mask, (max_shape, max_shape))
    mask_colored = np.zeros(mask.shape).astype(np.float32)
    for i in np.unique(adata_well.obs["ID"]):
        adata_i = adata_well[adata_well.obs["ID"] == i]
        mask_colored[mask == i] = (
            1 + np.array(adata_i.obs[obs_name]).astype(np.float32)[0]
        )
    colors_2 = []
    for cluster_num in (np.unique(mask_colored)[1:] - 1).astype(int):
        colors_2.append(colors[cluster_num])
    labels_rgb = label2rgb(mask_colored, bg_label=0, colors=colors_2)
    dpi = mpl.rcParams["figure.dpi"]
    fig = plt.figure(figsize=(mask_colored.shape[1] / dpi, mask_colored.shape[0] / dpi))
    fig.tight_layout()
    ax = fig.add_axes([0, 0, 1, 1])
    ax.imshow(labels_rgb)
    ax.axis("off")
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    ax.spines["bottom"].set_visible(False)
    ax.spines["left"].set_visible(False)
    plt.axis("off")
    canvas = plt.gca().figure.canvas
    canvas.draw()
    data = np.frombuffer(canvas.tostring_rgb(), dtype=np.uint8)
    image = data.reshape(canvas.get_width_height()[::-1] + (3,))
    io.imsave(
        f"figures/ecm_cluster/overlay_cluster_annotations_{sample}.png",
        image,
        check_contrast=False,
    )
    plt.close()

In [None]:
dir_output = "/cluster/scratch/gutgi/4i/Brain_ECM_4i_2_v2/"
ann_output = Path(dir_output, "anndata_combined")
ann_output.mkdir(parents=True, exist_ok=True)

dir_bg_subtracted = Path(dir_output, "bg_subtracted")
ann_dir = Path(dir_output, "anndata")
overview = pd.read_csv("updated_brain_ECM_2.csv")
overview["id"] = overview["Condition"] + overview["Day"].astype(str)
overview["unqiue_id"] = (
    overview["Number"].astype(str)
    + " "
    + overview["Block"].astype(str)
    + " "
    + overview["Condition"]
    + " "
    + overview["Day"].astype(str)
)

morpho_dir = Path(dir_output, "feature_tables_morphometrics_cells")
ann_dir_cytoplasma = Path(dir_output, "anndata_mean_cytoplasma")
ann_dir_ecm_niche = Path(dir_output, "anndata_mean_ecm_niche")
ann_dir_nuclei = Path(dir_output, "anndata_mean_cell_nuclei")

samples = [  # Day 7
    "R066_0",
    "R062_0",
    "R073_0",
    "R072_0",
    # Day 15
    #'R021_0',
    "R026_0",
    "R022_0",
    "R024_0",
    "R049_0",
    "R055_0",
    "R044_0",
    "R057_0",
    # Day 21
    "R076_1",
    "R087_0",
    "R082_0",
    "R108_1",
    "R112_1",
    "R113_1",
    "R115_0",
]


ecm_genes = [
    "Col4A1",
    "Col2A1",
    "Fibronectin",
    "Laminin",
    #'WNT5a',
    "IGFBP2",
    "HAPLN1",
    "VCAN",
    "SFRP2",
]

cytoplamsa_membrane = [
    "Cytokeratin",
    "TUBB3",
    "pVIM",
    "B-cat",
    "Human",
    "ITGA5B1",
    "Vinculin",
    "Nes",
    "WNT5a",
    "IGFBP2",
    "Arl13b",
    "Piezo1",
    "Scribble",
    "FLNA",
    "RSPO3",
    "n-Cad",
    "CDH1",
    "ITGB5",
    "GPR177WLS",
    "DLL1",
    "VIM",
    #'ITGB1',
    "JAG1",
    "GPC3",
    "NUMB",
    "Vangl2",
]

nuclear = [
    "OTX2",
    "PAX37",
    "OCT4",
    "PH3S0",
    "SOX2",
    "SOX10",
    "NR2F1COUP=TFI",
    "PROM1CD133",
    "TBR1",
    "HES1",
    "GSX2",
    "Gli3",
    "PAX3",
    "RAX",
    "Pax6",
    "Sox21",
    "HES4",
    "Vangl2",
]

In [None]:
anndatas_compartements = []
for ann_dir, compartement, good_stains in zip(
    [ann_dir_cytoplasma], ["cellular_ecm"], [ecm_genes]
):
    anndatas = []
    for sample in samples:
        number = int(sample.split("_")[0].split("R")[1])

        if (overview["Number"] == number).any():
            adata_org = ad.read_h5ad(Path(ann_dir, f"{sample}.h5ad"))
            df = pd.read_csv(Path(morpho_dir, f"{sample}.csv"))
            df = df[df["label"].isin(adata_org.obs["ID"])]

            assert (np.array(df["label"]) == np.array(adata_org.obs["ID"])).all()

            adata_org.obs["Day"] = overview[overview["Number"] == number].iloc[0]["Day"]
            adata_org.obs["Condition"] = overview[overview["Number"] == number].iloc[0][
                "Condition"
            ]
            adata_org.obs["Block"] = overview[overview["Number"] == number].iloc[0][
                "Block"
            ]
            adata_org.obs["sample_num"] = overview[overview["Number"] == number].iloc[
                0
            ]["Number"]
            adata_org.obs["sample"] = sample

            for feature in df.columns:
                adata_org.obs[feature] = np.array(df[feature])

            anndatas.append(adata_org)

    adata = ad.concat(anndatas)
    stain_vector = [x in good_stains for x in adata.var_names]
    adata = adata[:, stain_vector]

    # Unique id for vars
    adata.var_names = np.array(adata.var_names) + "_" + compartement

    # Unique id for obs
    adata.obs["unique_id"] = adata.obs["ID"].astype(str) + adata.obs["sample"].astype(
        str
    )
    adata.obs_names = adata.obs["unique_id"]

    # Remove small nuclei/cytoplasmas
    anndatas_compartements.append(adata)
adata = ad.concat(anndatas_compartements, axis=1, merge="same")
adata.obs["sample_num_str"] = adata.obs["sample_num"].astype(str)

# Load other h5 and only keep is in unique id
ann_path = Path(dir_output, "brain_ecm_4i_2_v2_mean.h5ad")
adata_cyto_nuc = ad.read_h5ad(ann_path)
adata = adata[adata.obs["unique_id"].isin(adata_cyto_nuc.obs["unique_id"])]

In [None]:
adata.obs["compartement"] = "cellular"
adata_ecm.obs["compartement"] = "extra_cellular"

In [None]:
for protein in adata.var_names:
    color_pallete_perturbation = {"Matrigel": "#17ad97", "No Matrix": "#4d4d4d"}

    time_course = pd.DataFrame()
    time_course[f"Log expression \n {protein}"] = adata[:, protein].X[:, 0]
    time_course["Condition"] = np.array(adata.obs["Condition"])
    time_course["Day"] = np.array(adata.obs["Day"])
    fig, ax = plt.subplots(figsize=(5, 3))
    sns.despine(left=True, bottom=True, right=True)

    sns.violinplot(
        time_course,
        x="Day",
        y=f"Log expression \n {protein}",
        split=True,
        palette=color_pallete_perturbation,
        gap=0.05,
        hue="Condition",
        cut=0,
    ).legend(loc="center left", bbox_to_anchor=(1.0, 0.5), fontsize=16)
    fig.savefig(
        f"figures/cellular_protein_expression/violin_plot_{protein}.svg",
        bbox_inches="tight",
    )