In [1]:
import os

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np

%matplotlib inline
mpl.rcParams["figure.dpi"] = 300
from skimage.io import imread

plt.style.use("dark_background")
plt.rcParams["figure.figsize"] = [12, 8]
plt.rcParams["figure.dpi"] = 100  # 200 e.g. is really fine, but slower
plt.style.use("classic")
plt.style.use("seaborn-white")

plt.rcParams["figure.figsize"] = [12, 8]
plt.rcParams["figure.dpi"] = 100  # 200 e.g. is really fine, but slower

mpl.rcParams["pdf.fonttype"] = 42
mpl.rcParams["ps.fonttype"] = 42
plt.rcParams["font.family"] = "Arial"

import re

import pandas as pd
import scanpy as sc

sc.set_figure_params(dpi=200)


import seaborn as sns
import skimage
from morphometrics.explore.cluster import cluster_features
from morphometrics.explore.dimensionality_reduction import pca
from morphometrics.utils.anndata_utils import table_to_anndata
from skimage.measure import label
from tqdm import tqdm

rng = np.random.default_rng(42)
import scipy.spatial.distance as distance


def colorFader(
    c1, c2, mix=0
):  # fade (linear interpolate) from color c1 (at mix=0) to c2 (mix=1)
    c1 = np.array(mpl.colors.to_rgb(c1))
    c2 = np.array(mpl.colors.to_rgb(c2))
    return mpl.colors.to_hex((1 - mix) * c1 + mix * c2)


cm = 1 / 2.54  # centimeters in inches

0.19.3


In [None]:
seg_directory = "/3D_Brain_organoids_half_res_morphometrics/"
# load data
region_properties_table = pd.read_hdf(
    f"{seg_directory}/QC_morphometrics_all_v20_06_2023_half_res.h5"
)
print("read_QC5")

region_properties_table_replicates = pd.read_hdf(
    f"{seg_directory}/QC_morphometrics_v20_06_2023_replicates.h5"
)
print("read_QC_replicates_1")

region_properties_table = region_properties_table.append(
    region_properties_table_replicates
)
print("Only AGAR experiment")
# Only keep cells from the AGAR exp.
region_properties_table = region_properties_table[
    region_properties_table["experiment"] == "AGAR"
]

region_properties_table["Axis length ratio"] = (
    region_properties_table["axis_major_length"]
    / region_properties_table["axis_minor_length"]
)

indices = region_properties_table["structure_labels"] == 3
actin_DF = region_properties_table[indices].copy()
actin_DF = actin_DF.loc[:, ~actin_DF.columns.str.contains("centroid", case=False)]
actin_DF = actin_DF.loc[:, ~actin_DF.columns.str.contains("moments", case=False)]
actin_DF = actin_DF.loc[:, ~actin_DF.columns.str.contains("_intensity", case=False)]
actin_DF = actin_DF.loc[:, ~actin_DF.columns.str.contains("bbox-", case=False)]
new_tab = actin_DF.iloc[:, 2:-11]
new_tab["Axis_length_ratio"] = actin_DF["Axis length ratio"]
measurement_data = table_to_anndata(measurement_table=new_tab)

all_non_sur = [
    "surface_area",
    "curvature_mean",
    "curvature_stdev",
    "curvature_0",
    "curvature_10",
    "curvature_20",
    "curvature_30",
    "curvature_40",
    "curvature_50",
    "curvature_60",
    "curvature_70",
    "curvature_80",
    "curvature_90",
    "curvature_100",
    "area",
    "bbox_area",
    "convex_area",
    "equivalent_diameter",
    "axis_minor_length",
    "axis_major_length",
    "Axis_length_ratio",
]

assert (measurement_data.var_names == all_non_sur).all()

# Rename all
measurement_data.obs["time_point"] = np.array(actin_DF["time_point"].astype(np.int32))
measurement_data.obs["Day"] = np.array(actin_DF["Day"].astype(np.int32))
measurement_data.obs["structure_labels"] = np.array(
    actin_DF["structure_labels"].astype(np.int32)
)
measurement_data.obs["position"] = np.array(actin_DF["position"].astype(np.int32))
measurement_data.obs["organoid"] = np.array(actin_DF["position"].astype(np.int32))
measurement_data.obs["label"] = np.array(actin_DF["label"])
measurement_data.obs["position"][measurement_data.obs["position"] == 1] = "Matrigel"
measurement_data.obs["position"][measurement_data.obs["position"] == 2] = "Matrigel"
measurement_data.obs["position"][measurement_data.obs["position"] == 3] = "Matrigel"
measurement_data.obs["position"][measurement_data.obs["position"] == 4] = "Matrigel"
measurement_data.obs["position"][measurement_data.obs["position"] == 9] = "Agarose"
measurement_data.obs["position"][measurement_data.obs["position"] == 10] = "Agarose"
measurement_data.obs["position"][measurement_data.obs["position"] == 11] = "Agarose"
measurement_data.obs["position"][measurement_data.obs["position"] == 12] = "Agarose"
measurement_data.obs["position"][measurement_data.obs["position"] == 13] = "No Matrix"
measurement_data.obs["position"][measurement_data.obs["position"] == 14] = "No Matrix"
measurement_data.obs["position"][measurement_data.obs["position"] == 14] = "No Matrix"
measurement_data.obs["position"][measurement_data.obs["position"] == 15] = "No Matrix"
measurement_data.obs["position"][measurement_data.obs["position"] == 16] = "No Matrix"

measurement_data.obs["structure_labels"][
    measurement_data.obs["structure_labels"] == 2
] = "Histone"
measurement_data.obs["structure_labels"][
    measurement_data.obs["structure_labels"] == 3
] = "Actin"
measurement_data.obs["structure_labels"][
    measurement_data.obs["structure_labels"] == 4
] = "Tubulin"
measurement_data.obs["structure_labels"][
    measurement_data.obs["structure_labels"] == 5
] = "Lamin"
measurement_data.obs["structure_labels"][
    measurement_data.obs["structure_labels"] == 6
] = "CAAX"
measurement_data.obs["Axis_length_ratio_raw"] = np.array(actin_DF["Axis length ratio"])
measurement_data.obs["volume_raw"] = np.array(actin_DF["area"])

In [None]:
# Use PCA to reduce dimensionality
%matplotlib inline

pca(measurement_data, normalize_data=True)
sc.pl.pca(measurement_data)
sc.pl.pca_variance_ratio(measurement_data)

cluster_features(
    measurement_data,
    method="leiden",
    compute_neighbors=True,
    neighbors_kwargs={"n_pcs": 5, "n_neighbors": 15},
    clustering_kwargs={"resolution": 0.6},
)

sc.tl.umap(measurement_data, min_dist=0.2, spread=2.0)

# clusters 9 and 8 are composed of debris
actin_DF = actin_DF[np.array(measurement_data.obs["leiden"] != "9")]
measurement_data = measurement_data[measurement_data.obs["leiden"] != "9"]
actin_DF = actin_DF[np.array(measurement_data.obs["leiden"] != "8")]
measurement_data = measurement_data[measurement_data.obs["leiden"] != "8"]

In [None]:
import met_brewer

clusters = np.array(measurement_data.obs["leiden"])
colors = met_brewer.met_brew(
    name="Johnson",
    n=len(np.unique(measurement_data.obs["leiden"])),
    brew_type="continuous",
)
cmap_brewer_image = matplotlib.colors.ListedColormap(
    ["#000000"] + colors, name="from_list", N=None
)
cmap_brewer_umap = matplotlib.colors.ListedColormap(colors, name="from_list", N=None)

In [None]:
sc.set_figure_params(dpi=200, vector_friendly=False)

sc.set_figure_params(dpi=200)
sc.pl.umap(
    measurement_data,
    color="leiden",
    palette=colors,
    size=14,
    title="",
    frameon=False,
    legend_fontsize="x-small",
)
sc.pl.umap(
    measurement_data,
    color="position",
    size=14,
    title="",
    frameon=False,
    legend_fontsize="x-small",
    palette={
        "Matrigel": "#17ad97",
        "Agarose": "#98d9d1",
        "No Matrix": "#4d4d4d",
    },
)

In [None]:
# Redo clustering, with removed cells
%matplotlib inline

pca(measurement_data, normalize_data=True)
sc.pl.pca(measurement_data)
sc.pl.pca_variance_ratio(measurement_data)

cluster_features(
    measurement_data,
    method="leiden",
    compute_neighbors=True,
    neighbors_kwargs={"n_pcs": 5, "n_neighbors": 15},
    clustering_kwargs={"resolution": 0.6},
)

sc.tl.umap(measurement_data, min_dist=0.2, spread=2.0)
sc.tl.paga(measurement_data, groups="leiden")

adata.write("anndatas/actin_morphometrics_all_panel_defg_12_07_2024.h5ad")