In [None]:
import os

import matplotlib as mpl
import matplotlib.pyplot as plt

%matplotlib inline
mpl.rcParams["figure.dpi"] = 300

import matplotlib
import numpy as np
import pandas as pd
import scanpy as sc
import skimage

sc.settings.vector_friendly = False
print(skimage.__version__)
plt.rcParams["figure.figsize"] = [12, 8]
plt.rcParams["figure.dpi"] = 100
from morphometrics.explore.cluster import cluster_features
from morphometrics.explore.dimensionality_reduction import pca
from morphometrics.utils.anndata_utils import table_to_anndata

rng = np.random.default_rng(42)

In [None]:
seg_directory = "/cluster/project/treutlein/DATA/imaging/EmbedSeg_test/data/3D_Brain_organoids_half_res_morphometrics/"
# load data
region_properties_table = pd.read_hdf(
    f"{seg_directory}/QC_morphometrics_all_v20_06_2023_half_res.h5"
)
print("read_QC5")

region_properties_table_replicates = pd.read_hdf(
    f"{seg_directory}/QC_morphometrics_v20_06_2023_replicates.h5"
)
print("read_QC_replicates_1")

region_properties_table = region_properties_table.append(
    region_properties_table_replicates
)
print("Only multimosaic experiment")
# Only keep cells from the multimosaic exp.
region_properties_table = region_properties_table[
    region_properties_table["experiment"] == "multimosaic"
]

In [None]:
import matplotlib

matplotlib.rcParams["pdf.fonttype"] = 42
matplotlib.rcParams["ps.fonttype"] = 42
plt.rcParams["font.family"] = "Arial"

In [None]:
all_figures = []

for marker, marker_label in zip(
    [2, 3, 4, 5, 6], ["Histone", "Actin", "Tubulin", "Lamin", "CAAX"]
):
    indices = region_properties_table["structure_labels"] == marker
    all_gfp = region_properties_table[indices].copy()
    region_properties_table["Axis length ratio"] = (
        region_properties_table["axis_major_length"]
        / region_properties_table["axis_minor_length"]
    )

    all_gfp = region_properties_table[indices].copy()

    all_gfp["Day"] = 4 + ((all_gfp["time_point"] - 1) / 24)

    all_gfp = all_gfp.loc[:, ~all_gfp.columns.str.contains("centroid", case=False)]
    all_gfp = all_gfp.loc[:, ~all_gfp.columns.str.contains("moments", case=False)]
    all_gfp = all_gfp.loc[:, ~all_gfp.columns.str.contains("_intensity", case=False)]
    all_gfp = all_gfp.loc[:, ~all_gfp.columns.str.contains("bbox-", case=False)]
    all_gfp = all_gfp[all_gfp["Day"] < 10]

    new_tab = all_gfp.iloc[:, 2:-11]
    new_tab = new_tab.loc[
        :, ~new_tab.columns.str.contains("intensity_image", case=False)
    ]
    new_tab["Axis_length_ratio"] = all_gfp["Axis length ratio"]
    measurement_data = table_to_anndata(measurement_table=new_tab)
    measurement_data.obs["time_point"] = np.array(
        all_gfp["time_point"].astype(np.int32)
    )
    measurement_data.obs["Day"] = np.array(all_gfp["Day"].astype(np.int32))
    measurement_data.obs["structure_labels"] = np.array(
        all_gfp["structure_labels"].astype(np.int32)
    )
    measurement_data.obs["label"] = np.array(all_gfp["label"])

    measurement_data.obs["structure_labels"][
        measurement_data.obs["structure_labels"] == 2
    ] = "Histone"
    measurement_data.obs["structure_labels"][
        measurement_data.obs["structure_labels"] == 3
    ] = "Actin"
    measurement_data.obs["structure_labels"][
        measurement_data.obs["structure_labels"] == 4
    ] = "Tubulin"
    measurement_data.obs["structure_labels"][
        measurement_data.obs["structure_labels"] == 5
    ] = "Lamin"
    measurement_data.obs["structure_labels"][
        measurement_data.obs["structure_labels"] == 6
    ] = "CAAX"

    measurement_data.obs["axis_length_ratio_raw"] = np.array(
        all_gfp["Axis length ratio"]
    )
    measurement_data.obs["volume_raw"] = (
        np.array(all_gfp["area"]) * 2 * 0.347 * 2 * 0.347 * 2 * 0.347
    )
    print(measurement_data.shape)
    # Use PCA to reduce dimensionality
    %matplotlib inline

    pca(measurement_data, normalize_data=True)
    sc.pl.pca(measurement_data)
    sc.pl.pca_variance_ratio(measurement_data)

    cluster_features(
        measurement_data,
        method="leiden",
        compute_neighbors=True,
        neighbors_kwargs={"n_pcs": 4},
        clustering_kwargs={"resolution": 0.4},
    )
    sc.tl.umap(measurement_data, min_dist=0.2, spread=0.4)

    mean_age_frame = measurement_data.obs.groupby("leiden")["Day"].mean()
    leiden_frame = pd.DataFrame(measurement_data.obs["leiden"])

    average_age = []
    for cluster in leiden_frame["leiden"]:
        average_age.append(mean_age_frame[cluster])
    average_age = np.array(average_age)
    measurement_data.obs["average_cluster_age"] = average_age
    plt.style.use("classic")
    plt.style.use("seaborn-white")
    import met_brewer

    colors = met_brewer.met_brew(
        name="Johnson",
        n=len(np.unique(measurement_data.obs["leiden"])),
        brew_type="continuous",
    )
    cmap_brewer_umap = matplotlib.colors.ListedColormap(
        colors, name="from_list", N=None
    )

    sc.settings.figdir = "extended_figures/multimosaic_all_markers/"
    sc.set_figure_params(dpi=200, vector_friendly=False)
    matplotlib.rcParams["pdf.fonttype"] = 42
    matplotlib.rcParams["ps.fonttype"] = 42

    sc.tl.paga(measurement_data, groups="leiden")
    # plot paga, remove edges with low weights
    # Create umap, based on PAGA

    sc.pl.paga(
        measurement_data,
        title=marker_label,
        node_size_scale=7,
        threshold=0.1,
        frameon=False,
        show=False,
        cmap=cmap_brewer_umap,
    )

    sc.tl.umap(measurement_data, init_pos="paga")

    sc.set_figure_params(
        scanpy=True,
        dpi=80,
        dpi_save=150,
        frameon=False,
        vector_friendly=False,
        fontsize=6,
        figsize=(2.5, 2.5),
        color_map=None,
        format="pdf",
        facecolor=None,
        transparent=True,
        ipython_format="png2x",
    )

    sc.pl.umap(
        measurement_data,
        color="leiden",
        legend_fontsize=7,
        palette=colors,
        size=14,
        title="",
        frameon=False,
        save=f"_{marker_label}_paga_initialized_leiden.pdf",
    )

    sc.pl.paga(
        measurement_data,
        title="",
        node_size_scale=1,
        threshold=0.1,
        frameon=False,
        fontsize=5,
        cmap=cmap_brewer_umap,
        save=f"_{marker_label}_leiden.pdf",
    )

    sc.pl.paga(
        measurement_data,
        color=["Day"],
        title="",
        fontsize=6,
        node_size_scale=1,
        threshold=0.1,
        frameon=False,
        save=f"_{marker_label}_average_age.pdf",
    )

    sc.pl.paga(
        measurement_data,
        color=["volume_raw"],
        title="",
        node_size_scale=1,
        threshold=0.1,
        fontsize=6,
        frameon=False,
        save=f"_{marker_label}_volume.pdf",
    )

    sc.pl.paga(
        measurement_data,
        color=["axis_length_ratio_raw"],
        show=False,
        title="",
        node_size_scale=1,
        threshold=0.1,
        fontsize=6,
        save=f"_{marker_label}_axis_ratio.pdf",
    )

In [None]:
import tifffile

for cluster in ["0", "2", "17", "11"]:
    print(cluster)
    actin_DF_one_cluster = all_gfp[
        np.array(measurement_data.obs["leiden"] == cluster)
    ].copy()
    counter = 0
    sample_cluster_dir = f"figures/cluster_examples_multimo/cluster_{cluster}/"
    if not os.path.exists(sample_cluster_dir):
        os.makedirs(sample_cluster_dir)

    for image in actin_DF_one_cluster.sample(n=10, random_state=1)["intensity_image"]:
        # plt.imshow(image.max(0),cmap='gray',interpolation='nearest')
        # plt.grid(None)
        # plt.show()

        tifffile.imwrite(
            sample_cluster_dir
            + f"sample_cell_cluster_{cluster}_example_number_{counter}.tif",
            image.max(0).clip(0.0, 2**16 - 1).astype(np.uint16),
            imagej=True,
            resolution=(1.0 / (0.347 * 2), 1.0 / (0.347 * 2)),
            metadata={"spacing": 2, "unit": "um", "axes": "YX"},
            compression="zlib",
        )
        counter += 1