In [None]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np

%matplotlib inline
mpl.rcParams["figure.dpi"] = 300

import numpy as np
import pandas as pd
import scanpy as sc
import skimage

sc.settings.vector_friendly = False
print(skimage.__version__)

plt.rcParams["figure.figsize"] = [12, 8]
plt.rcParams["figure.dpi"] = 100
from morphometrics.explore.cluster import cluster_features
from morphometrics.explore.dimensionality_reduction import pca
from morphometrics.utils.anndata_utils import table_to_anndata

rng = np.random.default_rng(42)

import matplotlib

matplotlib.rcParams["pdf.fonttype"] = 42
matplotlib.rcParams["ps.fonttype"] = 42

In [None]:
seg_directory = "/cluster/project/treutlein/DATA/imaging/EmbedSeg_test/data/3D_Brain_organoids_half_res_morphometrics/"
# load data
region_properties_table = pd.read_hdf(
    f"{seg_directory}/QC_morphometrics_all_v20_06_2023_half_res.h5"
)
print("read_QC5")

region_properties_table_replicates = pd.read_hdf(
    f"{seg_directory}/QC_morphometrics_v20_06_2023_replicates.h5"
)
print("read_QC_replicates_1")

region_properties_table = region_properties_table.append(
    region_properties_table_replicates
)
print("Only multimosaic experiment")
# Only keep cells from the multimosaic exp.
region_properties_table = region_properties_table[
    region_properties_table["experiment"] == "multimosaic"
]

indices = region_properties_table["structure_labels"] != 1
all_gfp = region_properties_table[indices].copy()
region_properties_table["Axis length ratio"] = (
    region_properties_table["axis_major_length"]
    / region_properties_table["axis_minor_length"]
)

all_gfp = region_properties_table[indices].copy()

all_gfp["Day"] = 4 + ((all_gfp["time_point"] - 1) / 24)

all_gfp = all_gfp.loc[:, ~all_gfp.columns.str.contains("centroid", case=False)]
all_gfp = all_gfp.loc[:, ~all_gfp.columns.str.contains("moments", case=False)]
all_gfp = all_gfp.loc[:, ~all_gfp.columns.str.contains("_intensity", case=False)]
all_gfp = all_gfp.loc[:, ~all_gfp.columns.str.contains("bbox-", case=False)]
all_gfp = all_gfp[all_gfp["Day"] < 10]


new_tab = all_gfp.iloc[:, 2:-11]
new_tab = new_tab.loc[:, ~new_tab.columns.str.contains("intensity_image", case=False)]
new_tab["Axis_length_ratio"] = all_gfp["Axis length ratio"]
measurement_data = table_to_anndata(measurement_table=new_tab)


print(measurement_data)

In [None]:
surface_1 = [
    "surface_area",
    "curvature_mean",
    "curvature_stdev",
    "curvature_0",
    "curvature_10",
    "curvature_20",
    "curvature_30",
    "curvature_40",
    "curvature_50",
    "curvature_60",
    "curvature_70",
    "curvature_80",
    "curvature_90",
    "curvature_100",
]
size = ["area", "bbox_area", "convex_area", "equivalent_diameter"]
extra = ["axis_minor_length", "axis_major_length", "Axis_length_ratio"]
assert (measurement_data.var_names == surface_1 + size + extra).all()

measurement_data.obs["time_point"] = np.array(all_gfp["time_point"].astype(np.int32))
measurement_data.obs["Day"] = np.array(all_gfp["Day"].astype(np.int32))
measurement_data.obs["structure_labels"] = np.array(
    all_gfp["structure_labels"].astype(np.int32)
)
measurement_data.obs["label"] = np.array(all_gfp["label"])

measurement_data.obs["structure_labels"][
    measurement_data.obs["structure_labels"] == 2
] = "Histone"
measurement_data.obs["structure_labels"][
    measurement_data.obs["structure_labels"] == 3
] = "Actin"
measurement_data.obs["structure_labels"][
    measurement_data.obs["structure_labels"] == 4
] = "Tubulin"
measurement_data.obs["structure_labels"][
    measurement_data.obs["structure_labels"] == 5
] = "Lamin"
measurement_data.obs["structure_labels"][
    measurement_data.obs["structure_labels"] == 6
] = "CAAX"

measurement_data.obs["axis_length_ratio_raw"] = np.array(all_gfp["Axis length ratio"])
measurement_data.obs["volume_raw"] = (
    np.array(all_gfp["area"]) * 2 * 0.347 * 2 * 0.347 * 2 * 0.347
)
measurement_data.obs["combined"] = (
    np.array(all_gfp["area"]) * 2 * 0.347 * 2 * 0.347 * 2 * 0.347
)

In [None]:
# Use PCA to reduce dimensionality
%matplotlib inline

pca(measurement_data, normalize_data=True)
sc.pl.pca(measurement_data)
sc.pl.pca_variance_ratio(measurement_data)

# cluster
cluster_features(
    measurement_data,
    method="leiden",
    compute_neighbors=True,
    neighbors_kwargs={"n_pcs": 4},
    clustering_kwargs={"resolution": 1.5},
)
sc.tl.umap(measurement_data, min_dist=0.2, spread=0.4)

mean_age_frame = measurement_data.obs.groupby("leiden")["Day"].mean()
leiden_frame = pd.DataFrame(measurement_data.obs["leiden"])

average_age = []
for cluster in leiden_frame["leiden"]:
    average_age.append(mean_age_frame[cluster])
average_age = np.array(average_age)
measurement_data.obs["average_cluster_age"] = average_age

In [None]:
plt.style.use("classic")
plt.style.use("seaborn-white")

In [None]:
import met_brewer

colors = met_brewer.met_brew(
    name="Archambault",
    n=len(np.unique(measurement_data.obs["structure_labels"])),
    brew_type="continuous",
)

In [None]:
colors_structures = colors.copy()

In [None]:
colors_structures[4] = colors[2]
colors_structures[2] = colors[4]

In [None]:
sc.set_figure_params(dpi=200, vector_friendly=False)

sc.pl.umap(
    measurement_data,
    color="structure_labels",
    size=15,
    title="",
    frameon=False,
    legend_fontsize="x-small",
    palette=colors_structures,
    save="_paga_multimosaic_all_markers.pdf",
)

In [None]:
np.unique(measurement_data.obs["Day"])
import matplotlib

matplotlib.rcParams["pdf.fonttype"] = 42
matplotlib.rcParams["ps.fonttype"] = 42
plt.rcParams["font.family"] = "Arial"

In [None]:
color_volume_midnight_blue = matplotlib.colors.LinearSegmentedColormap.from_list(
    "", ["#f7f7f7", "#191970"]
)

sc.pl.umap(
    measurement_data[
        ~(
            (measurement_data.obs["leiden"] == "2")
            | (measurement_data.obs["leiden"] == "16")
            | (measurement_data.obs["leiden"] == "18")
            | (measurement_data.obs["leiden"] == "10")
            | (measurement_data.obs["leiden"] == "4")
            | (measurement_data.obs["leiden"] == "0")
        )
    ].copy(),
    show=True,
    color="volume_raw",
    size=15,
    title="",
    cmap=color_volume_midnight_blue,
    frameon=False,
    legend_fontsize="x-small",
    palette=colors_structures,
    save="_volume_top_clusters_paga_initialized_volume_mum3_midnightblue.pdf",
)

In [None]:
color_volume_maroon = matplotlib.colors.LinearSegmentedColormap.from_list(
    "", ["#f7f7f7", "#730215"]
)

sc.pl.umap(
    measurement_data[
        (
            (measurement_data.obs["leiden"] == "2")
            | (measurement_data.obs["leiden"] == "16")
            | (measurement_data.obs["leiden"] == "18")
            | (measurement_data.obs["leiden"] == "10")
            | (measurement_data.obs["leiden"] == "4")
            | (measurement_data.obs["leiden"] == "0")
        )
    ].copy(),
    show=True,
    color="axis_length_ratio_raw",
    cmap=color_volume_maroon,
    size=15,
    title="",
    frameon=False,
    legend_fontsize="x-small",
    palette=colors_structures,
    save="_bottom_clusters_paga_multimosaic_all_markers_axis_length_ratio_maroon.pdf",
)

In [None]:
color_age = matplotlib.colors.LinearSegmentedColormap.from_list(
    "", ["#f7f7f7", "#1f1f1f"]
)

sc.set_figure_params(dpi=200, vector_friendly=False)

sc.pl.paga(
    measurement_data,
    color=["Day"],
    title="",
    node_size_scale=7,
    threshold=0.1,
    cmap=color_age,
    frameon=False,
    save="_graph_age_days_multimosaic.pdf",
)

In [None]:
sc.set_figure_params(dpi=200, vector_friendly=False)

sc.pl.paga(
    measurement_data,
    color=["volume"],
    title="",
    node_size_scale=7,
    threshold=0.1,
    cmap=color_age,
    frameon=False,
    save="_graph_volume_multimo.pdf",
)