In [1]:
import os
import sys
import time
from urllib.parse import urlparse

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np

%matplotlib inline
mpl.rcParams["figure.dpi"] = 300
from IPython.display import HTML, clear_output
from PIL import Image
from skimage.io import imread, imsave
from skimage.transform import downscale_local_mean, rescale, resize

plt.style.use("dark_background")
import glob
import re

import anndata
import h5py
import joblib
import numpy as np
import pandas as pd
import psutil
import pymeshfix
import pyvista as pv
import scanpy as sc
import skimage
from morphometrics.measure import measure_selected
from skimage import draw
from tqdm import tqdm

print(skimage.__version__)

  from tqdm.autonotebook import tqdm


0.19.3


In [5]:
seg_directory = "/cluster/project/treutlein/DATA/imaging/viventis/Morphodynamics_of_human_early_brain_organoid_development/3D_Brain_organoids_half_res_morphometrics/"
experiment_directories = [
    "AGAR_replicates_v2_all_06_02_2023/",
    "images_AGAR_one_per_day_all_images_lamin_all_06_02_2023/",
    "3D_one_image_per_day_AGAR_all_all_06_02_2023/",
    "3D_one_image_per_day_all_06_02_2023/",
]

from joblib import Parallel, delayed

all_structures = pd.DataFrame()
for experiment_directory in experiment_directories:
    input_dir = seg_directory + experiment_directory
    print(input_dir)
    output_dir = input_dir + "/morphometrics/"
    all_files = [os.path.basename(x) for x in glob.glob(f"{input_dir}/images/*")]
    results = Parallel(n_jobs=12, verbose=10)(
        delayed(read_extract)(i) for i in range(len(all_files))
    )
    for result in tqdm(results):
        all_structures = all_structures.append(result)
    del results

# Remove nan measurements
region_properties_table = all_structures
region_properties_table = region_properties_table.reset_index()
region_properties_table = region_properties_table.dropna(axis=1, how="all")
region_properties_table = region_properties_table.dropna(axis=0, how="any")
# Remove unlikely masks
region_properties_table["z_hight"] = (
    region_properties_table["bbox-3"] - region_properties_table["bbox-0"]
)
region_properties_table = region_properties_table.loc[
    region_properties_table["max_intensity"] >= 20
]
region_properties_table = region_properties_table.loc[
    (region_properties_table["area"] >= 100 / (0.347 * 2 * 2 * 0.347 * 2 * 0.347))
]

In [None]:
annotation_directory = "/cluster/project/treutlein/DATA/imaging/EmbedSeg_test/data/3D_Brain_organoids_with_meta/Annotation_QC_updated_replicates/"

In [11]:
import os

labeled_DF = pd.DataFrame()
annotation_directory = "/cluster/project/treutlein/DATA/imaging/EmbedSeg_test/data/3D_Brain_organoids_with_meta/Annotation_QC_updated_replicates/"

for root, dirs, files in os.walk(annotation_directory):
    for file in files:
        if file.endswith(".csv"):
            labeled_DF = labeled_DF.append(pd.read_csv(root + "/" + file))

annotation_directory = "/cluster/project/treutlein/DATA/imaging/EmbedSeg_test/data/3D_Brain_organoids_half_res_morphometrics/Annotation_QC_updated_replicates/"
import os

for root, dirs, files in os.walk(annotation_directory):
    for file in files:
        if file.endswith(".csv"):
            labeled_DF = labeled_DF.append(pd.read_csv(root + "/" + file))

annotation_directory = "/cluster/project/treutlein/DATA/imaging/EmbedSeg_test/data/3D_Brain_organoids_with_meta/Annotation_QC_updated/"
import os

for root, dirs, files in os.walk(annotation_directory):
    for file in files:
        if file.endswith(".csv"):
            labeled_DF = labeled_DF.append(pd.read_csv(root + "/" + file))

annotation_directory = "/cluster/project/treutlein/DATA/imaging/EmbedSeg_test/data/3D_Brain_organoids_half_res_morphometrics/Annotation_QC_updated/"
import os

for root, dirs, files in os.walk(annotation_directory):
    for file in files:
        if file.endswith(".csv"):
            labeled_DF = labeled_DF.append(pd.read_csv(root + "/" + file))

In [12]:
# cell id labelled datasets
labeled_DF["cell_id"] = (
    labeled_DF["Image"]
    .str.split("_", 1, expand=True)[1]
    .str.split(".j", 1, expand=True)[0]
)
cell_ids = []
for cell in labeled_DF["cell_id"]:
    tp = float(cell.split("_")[3])
    call_split = cell.split("_")
    cell_ids.append(
        f"{call_split[0]}_{call_split[1]}_{call_split[2]}_{tp}_{call_split[4]}"
    )
labeled_DF["cell_id"] = cell_ids
labeled_DF = labeled_DF.drop_duplicates(subset="cell_id", keep="last")

# Cell ID
region_properties_table = region_properties_table.dropna(axis=1, how="all")
label = region_properties_table["label"].astype(str)
experiment = region_properties_table["experiment"].astype(str)
channel = region_properties_table["channel"].astype(str)
position = region_properties_table["position"].astype(str)
time_point = region_properties_table["time_point"].astype(str)

region_properties_table["cell_id"] = (
    experiment + "_" + channel + "_" + position + "_" + time_point + "_" + label
)
labeled_DF = labeled_DF.merge(region_properties_table, on="cell_id")

# remove not possible combinations in annotations
# Wrong channel
labeled_DF = labeled_DF.loc[
    ~((labeled_DF["Label"] == 2) & (labeled_DF["channel"] != "GFP"))
]
labeled_DF = labeled_DF.loc[
    ~((labeled_DF["Label"] == 3) & (labeled_DF["channel"] != "GFP"))
]
labeled_DF = labeled_DF.loc[
    ~((labeled_DF["Label"] == 4) & (labeled_DF["channel"] != "mCherry"))
]
labeled_DF = labeled_DF.loc[
    ~((labeled_DF["Label"] == 5) & (labeled_DF["channel"] != "mCherry"))
]
labeled_DF = labeled_DF.loc[
    ~((labeled_DF["Label"] == 6) & (labeled_DF["channel"] != "mCherry"))
]
# Wrong experiment
labeled_DF = labeled_DF.loc[
    ~((labeled_DF["Label"] == 2) & (labeled_DF["experiment"] == "AGAR"))
]
labeled_DF = labeled_DF.loc[
    ~((labeled_DF["Label"] == 6) & (labeled_DF["experiment"] == "AGAR"))
]
labeled_DF.to_hdf(f"{seg_directory}/training_data_19_06_2023.h5", key="main")