In [None]:
import os
import sys
import time
from urllib.parse import urlparse

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np

%matplotlib inline
mpl.rcParams["figure.dpi"] = 300
from IPython.display import HTML, clear_output
from PIL import Image
from skimage.io import imread, imsave
from skimage.transform import downscale_local_mean, rescale, resize

plt.style.use("dark_background")
import glob
import re
from functools import partial

import anndata
import h5py
import joblib
import napari
import numpy as np
import pandas as pd
import psutil
import pymeshfix
import pyvista as pv
import scanpy as sc
import skimage
from joblib import Parallel, delayed
from morphometrics.measure import measure_selected
from skimage import draw
from skimage.measure import label, marching_cubes, regionprops, regionprops_table
from sklearn.ensemble import RandomForestClassifier

# Save classifier
from sklearn.model_selection import GridSearchCV
from tqdm import tqdm

In [None]:
seg_directory = "/cluster/project/treutlein/DATA/imaging/viventis/Morphodynamics_of_human_early_brain_organoid_development/3D_Brain_organoids_half_res_morphometrics/"
experiment_directories = [
    "images_AGAR_one_per_day_all_images_lamin_all_06_02_2023/",
    "3D_one_image_per_day_AGAR_all_all_06_02_2023/",
    "3D_one_image_per_day_all_06_02_2023/",
]

In [None]:
all_structures = pd.DataFrame()
for experiment_directory in experiment_directories:
    input_dir = seg_directory + experiment_directory
    print(input_dir)
    output_dir = input_dir + "/morphometrics/"
    all_files = [os.path.basename(x) for x in glob.glob(f"{input_dir}/images/*")]
    results = Parallel(n_jobs=12, verbose=10)(
        delayed(read_extract)(i) for i in range(len(all_files))
    )
    for result in tqdm(results):
        all_structures = all_structures.append(result)
    del results

region_properties_table = all_structures
region_properties_table = region_properties_table.reset_index()
region_properties_table = region_properties_table.dropna(axis=1, how="all")
region_properties_table = region_properties_table.dropna(axis=0, how="any")
# Remove unlikely masks
region_properties_table["z_hight"] = (
    region_properties_table["bbox-3"] - region_properties_table["bbox-0"]
)
region_properties_table = region_properties_table.loc[
    region_properties_table["max_intensity"] >= 20
]
region_properties_table = region_properties_table.loc[
    (region_properties_table["area"] >= 100 / (0.347 * 2 * 2 * 0.347 * 2 * 0.347))
]


# Load classifier
rf_save_dir = "/cluster/home/gutgi/git_repositories/morphodynamics-of-human-brain-organoid-patterning/light_sheet_analysis/cell_morphology_analysis/models/random_forrest_all_v16_08_23_test_3.joblib"
grid_clf = joblib.load(rf_save_dir)
region_properties_table = region_properties_table_duplicate.copy()
# Predict the label
X_all = (
    region_properties_table.loc[:, region_properties_table.columns != "time_point"]
    .iloc[:, 2:-8]
    .copy()
)
X_all["channel"] = (region_properties_table["channel"] == "GFP").astype(int)
X_all = np.array(X_all)
region_properties_table["structure_labels"] = grid_clf.predict(X_all)
# remove unlikely masks
region_properties_table = region_properties_table.loc[
    ~(
        (region_properties_table["structure_labels"] == 2)
        & (region_properties_table["experiment"] == "AGAR")
    )
]
region_properties_table = region_properties_table.loc[
    ~(
        (region_properties_table["structure_labels"] == 6)
        & (region_properties_table["experiment"] == "AGAR")
    )
]
region_properties_table = region_properties_table.loc[
    ~(
        (region_properties_table["structure_labels"] == 2)
        & (region_properties_table["channel"] != "GFP")
    )
]
region_properties_table = region_properties_table.loc[
    ~(
        (region_properties_table["structure_labels"] == 3)
        & (region_properties_table["channel"] != "GFP")
    )
]
region_properties_table = region_properties_table.loc[
    ~(
        (region_properties_table["structure_labels"] == 4)
        & (region_properties_table["channel"] != "mCherry")
    )
]
region_properties_table = region_properties_table.loc[
    ~(
        (region_properties_table["structure_labels"] == 5)
        & (region_properties_table["channel"] != "mCherry")
    )
]
region_properties_table = region_properties_table.loc[
    ~(
        (region_properties_table["structure_labels"] == 6)
        & (region_properties_table["channel"] != "mCherry")
    )
]
# Some QC, number of each label per day
region_properties_table["Day"] = np.round(
    4 + region_properties_table["time_point"] / 24, 0
).astype(int)
save_table = region_properties_table[
    region_properties_table["structure_labels"] != 1
].copy()
save_table = save_table.drop(columns="bbox_image")

# Only save good masks
np.set_printoptions(threshold=sys.maxsize)
save_table = region_properties_table[
    region_properties_table["structure_labels"] != 1
].copy()
# Drop bbox images, as it is no longer needed
save_table = save_table.drop(columns="bbox_image")
# save pd.DataFrame to H5
save_table.to_hdf(
    f"{seg_directory}/QC_morphometrics_v20_06_2023_replicates", key="test_h5"
)

In [None]:
experiment_directories = ["AGAR_replicates_v2_all_06_02_2023/"]
all_structures = pd.DataFrame()
for experiment_directory in experiment_directories:
    input_dir = seg_directory + experiment_directory
    print(input_dir)
    output_dir = input_dir + "/morphometrics/"
    all_files = [os.path.basename(x) for x in glob.glob(f"{input_dir}/images/*")]
    results = Parallel(n_jobs=12, verbose=10)(
        delayed(read_extract)(i) for i in range(len(all_files))
    )
    for result in tqdm(results):
        all_structures = all_structures.append(result)
    del results

region_properties_table = all_structures
region_properties_table = region_properties_table.reset_index()
region_properties_table = region_properties_table.dropna(axis=1, how="all")
region_properties_table = region_properties_table.dropna(axis=0, how="any")
# Remove unlikely masks
region_properties_table["z_hight"] = (
    region_properties_table["bbox-3"] - region_properties_table["bbox-0"]
)
region_properties_table = region_properties_table.loc[
    region_properties_table["max_intensity"] >= 20
]
region_properties_table = region_properties_table.loc[
    (region_properties_table["area"] >= 100 / (0.347 * 2 * 2 * 0.347 * 2 * 0.347))
]


# Load classifier
rf_save_dir = "/cluster/home/gutgi/git_repositories/morphodynamics-of-human-brain-organoid-patterning/light_sheet_analysis/cell_morphology_analysis/models/random_forrest_all_v16_08_23_test_3.joblib"
grid_clf = joblib.load(rf_save_dir)
region_properties_table = region_properties_table_duplicate.copy()
# Predict the label
X_all = (
    region_properties_table.loc[:, region_properties_table.columns != "time_point"]
    .iloc[:, 2:-8]
    .copy()
)
X_all["channel"] = (region_properties_table["channel"] == "GFP").astype(int)
X_all = np.array(X_all)
region_properties_table["structure_labels"] = grid_clf.predict(X_all)
# remove unlikely masks
region_properties_table = region_properties_table.loc[
    ~(
        (region_properties_table["structure_labels"] == 2)
        & (region_properties_table["experiment"] == "AGAR")
    )
]
region_properties_table = region_properties_table.loc[
    ~(
        (region_properties_table["structure_labels"] == 6)
        & (region_properties_table["experiment"] == "AGAR")
    )
]
region_properties_table = region_properties_table.loc[
    ~(
        (region_properties_table["structure_labels"] == 2)
        & (region_properties_table["channel"] != "GFP")
    )
]
region_properties_table = region_properties_table.loc[
    ~(
        (region_properties_table["structure_labels"] == 3)
        & (region_properties_table["channel"] != "GFP")
    )
]
region_properties_table = region_properties_table.loc[
    ~(
        (region_properties_table["structure_labels"] == 4)
        & (region_properties_table["channel"] != "mCherry")
    )
]
region_properties_table = region_properties_table.loc[
    ~(
        (region_properties_table["structure_labels"] == 5)
        & (region_properties_table["channel"] != "mCherry")
    )
]
region_properties_table = region_properties_table.loc[
    ~(
        (region_properties_table["structure_labels"] == 6)
        & (region_properties_table["channel"] != "mCherry")
    )
]
# Some QC, number of each label per day
region_properties_table["Day"] = np.round(
    4 + region_properties_table["time_point"] / 24, 0
).astype(int)
save_table = region_properties_table[
    region_properties_table["structure_labels"] != 1
].copy()
save_table = save_table.drop(columns="bbox_image")

# Only save good masks
np.set_printoptions(threshold=sys.maxsize)
save_table = region_properties_table[
    region_properties_table["structure_labels"] != 1
].copy()
# Drop bbox images, as it is no longer needed
save_table = save_table.drop(columns="bbox_image")
# save pd.DataFrame to H5
save_table.to_hdf(
    f"{seg_directory}/QC_morphometrics_all_v20_06_2023_all_v2.h5", key="test_h5"
)