In [None]:
# default_exp clone_counters

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# export
from functools import partial
from glob import glob
from typing import Callable

import dask.array as da
import dask.dataframe as dd
import numpy as np
import pandas as pd
import xarray as xr
from skimage import measure

from py_clone_detective.utils import (
    add_scale_regionprops_table_area_measurements,
    calculate_corresponding_labels,
    calculate_overlap,
    check_channels_input_suitable_and_return_channels,
    determine_labels_across_other_images_using_centroids,
    extend_region_properties_list,
    get_all_labeled_clones_unmerged_and_merged,
    img_path_to_xarr,
    last2dims,
    lazy_props,
    reorder_df_to_put_ch_info_first,
    update_1st_coord_and_dim_of_xarr,
)

# CloneCounter Classes

## Parent Class

In [None]:
# export
class CloneCounter:
    def __init__(
        self,
        exp_name: str,
        img_name_regex: str,
        pixel_size: float,
        tot_seg_ch: str = "C0",
    ):
        self.exp_name = exp_name
        self.img_name_regex = img_name_regex
        self.pixel_size = pixel_size
        self.tot_seg_ch = tot_seg_ch

    def add_images(self, **channel_path_globs):
        return img_path_to_xarr(
            self.img_name_regex,
            self.pixel_size,
            ch_name_for_first_dim="img_channels",
            **channel_path_globs,
        )

    def add_segmentations(
        self,
        additional_func_to_map: Callable = None,
        ad_func_kwargs: dict = None,
        **channel_path_globs,
    ):
        segmentations = img_path_to_xarr(
            self.img_name_regex,
            self.pixel_size,
            ch_name_for_first_dim="seg_channels",
            **channel_path_globs,
        )

        if additional_func_to_map is not None:
            segmentations.data = segmentations.data.map_blocks(
                additional_func_to_map, **ad_func_kwargs, dtype=np.uint16
            )

        segmentations.data = segmentations.data.map_blocks(
            last2dims(partial(measure.label)), dtype=np.uint16
        )
        return segmentations

    def combine_C0_overlaps_and_measurements(self):
        ov_df = (
            self.results_overlaps.pivot(
                index=["img_name", "C0_labels"],
                columns=["colocalisation_ch"],
                values="is_in_label",
            )
            .query("C0_labels != 0")
            .copy()
        )
        sk_df = self.results_measurements.query("seg_ch== 'C0'").set_index(
            ["seg_img", "label"]
        )
        sk_df.index.rename(["img_name", "C0_labels"], inplace=True)
        return pd.merge(ov_df, sk_df, left_index=True, right_index=True)

    def determine_seg_img_channel_pairs(
        self, seg_channels: list = None, img_channels: list = None
    ):
        seg_channels = check_channels_input_suitable_and_return_channels(
            channels=seg_channels,
            available_channels=self.image_data.seg_channels.values.tolist(),
        )

        img_channels = check_channels_input_suitable_and_return_channels(
            channels=img_channels,
            available_channels=self.image_data.img_channels.values.tolist(),
        )

        seg_img_channel_pairs = pd.DataFrame()
        seg_img_channel_pairs["image_channel"] = pd.Series(img_channels)
        seg_img_channel_pairs["segmentation_channel"] = pd.Series(seg_channels)
        self.seg_img_channel_pairs = seg_img_channel_pairs.fillna(method="ffill")[
            ["segmentation_channel", "image_channel"]
        ]

    def make_measurements(
        self,
        seg_channels: list = None,
        img_channels: list = None,
        extra_properties: list = None,
        **kwargs,
    ):

        self.determine_seg_img_channel_pairs(seg_channels, img_channels)

        properties = extend_region_properties_list(extra_properties)

        results = list()
        for _, seg_ch, img_ch in self.seg_img_channel_pairs.itertuples():
            for seg, img in zip(
                self.image_data["segmentations"].loc[seg_ch],
                self.image_data["images"].loc[img_ch],
            ):
                results.append(
                    lazy_props(
                        seg.data,
                        img.data,
                        seg.seg_channels.item(),
                        img.img_channels.item(),
                        seg.img_name.item(),
                        img.img_name.item(),
                        properties,
                        **kwargs,
                    )
                )

        df = dd.from_delayed(results).compute()
        df = add_scale_regionprops_table_area_measurements(df, self.pixel_size)
        self.results_measurements = reorder_df_to_put_ch_info_first(df)
        self._determine_max_seg_label_levels()

    def _determine_max_seg_label_levels(self):
        self.tot_seg_ch_max_labels = (
            self.image_data["segmentations"]
            .loc[self.tot_seg_ch]
            .data.map_blocks(
                lambda x: np.unique(x).shape[0], drop_axis=(1, 2), dtype=np.uint16,
            )
            .compute()
            .max()
        )

    def _create_df_from_arr(self, arr):
        return (
            xr.DataArray(
                np.moveaxis(arr, 1, 0),
                coords=(
                    self.image_data["segmentations"].coords["seg_channels"][1:],
                    self.image_data["segmentations"].coords["img_name"],
                    np.arange(self.tot_seg_ch_max_labels),
                ),
                dims=("colocalisation_ch", "img_name", "C0_labels",),
            )
            .to_dataframe("is_in_label")
            .reset_index()
            .dropna()
        )

    def measure_overlap(self):
        self._determine_max_seg_label_levels()
        arr = (
            self.image_data["segmentations"]
            .data.map_blocks(
                calculate_overlap,
                drop_axis=[0],
                dtype=np.float64,
                num_of_segs=self.image_data["segmentations"].shape[0],
                preallocate_value=self.tot_seg_ch_max_labels,
            )
            .compute()
        )

        df = self._create_df_from_arr(arr)
        df["is_in_label"] = df["is_in_label"].astype(np.uint16)
        self.results_overlaps = df[
            ["img_name", "C0_labels", "colocalisation_ch", "is_in_label"]
        ]

    def clones_to_keep_as_dict(self, query_for_pd: str):
        return (
            self.results_measurements.query(query_for_pd)
            .groupby("int_img")
            .agg({"label": lambda x: list(x)})["label"]
            .to_dict()
        )

    def get_centroids_list(self):
        df = self.results_measurements.query("int_img_ch == @self.tot_seg_ch")
        centroids_list = list()
        for img_name in df["int_img"].unique():
            centroids_list.append(
                (
                    df.query("int_img == @img_name")
                    .loc[:, ["centroid-0", "centroid-1"]]
                    .values.astype(int)
                )
            )
        return centroids_list

    def add_clones_and_neighbouring_labels(
        self,
        query_for_pd: str = 'int_img_ch == "C1" & mean_intensity > 1000',
        name_for_query: str = "filt_C1_intensity",
        calc_clones: str = True,
    ):
        new_coord = [
                    "extended_tot_seg_labels",
                    "total_neighbour_counts",
                    "inside_clone_neighbour_counts",
                    "outside_clone_neighbour_counts",
                ]
        
        if calc_clones:
            new_coord.append("clone")
            
        
        clone_coords, clone_dims = update_1st_coord_and_dim_of_xarr(
            self.image_data["images"],
            new_coord=new_coord,
            new_dim=f"{name_for_query}_neighbours",
        )

        clones_to_keep = self.clones_to_keep_as_dict(query_for_pd)

        new_label_imgs = get_all_labeled_clones_unmerged_and_merged(
            self.image_data["segmentations"].loc[self.tot_seg_ch],
            clones_to_keep,
            calc_clones,
        )

        return xr.DataArray(
            data=new_label_imgs,
            coords=clone_coords,
            dims=clone_dims,
            attrs={f"{self.tot_seg_ch}_labels_kept_query": query_for_pd},
        )

    def colabels_to_df(self, colabels, name_for_query):
        return (
            xr.DataArray(
                colabels,
                coords=(
                    self.image_data[name_for_query].coords[
                        "extended_labels_neighbour_counts"
                    ],
                    foo.image_data[name_for_query].coords["img_name"],
                    range(colabels.shape[2]),
                ),
                dims=("extended_labels_neighbour_counts", "img_name", "labels"),
            )
            .to_dataframe("colabel")
            .reset_index()
            .dropna()
            .pivot(
                index=["img_name", "labels"],
                columns=["extended_labels_neighbour_counts"],
                values="colabel",
            )
            .astype(np.uint16)
        )

    def measure_clones_and_neighbouring_labels(self, name_for_query):
        self.get_centroids_list()
        colabels = calculate_corresponding_labels(
            self.image_data[name_for_query].data,
            self.get_centroids_list(),
            self.image_data[name_for_query].shape[0],
            foo.tot_seg_ch_max_labels,
        )

        self.results_clones_and_neighbour_counts = self.colabels_to_df(
            colabels, name_for_query
        )

## CloneCounter subclasses

In [None]:
# export
class LazyCloneCounter(CloneCounter):
    def __init__(self, exp_name: str, img_name_regex: str, pixel_size: float):
        super().__init__(exp_name, img_name_regex, pixel_size)

    def add_images(self, **channel_path_globs):
        self.image_data = xr.Dataset(
            {"images": super().add_images(**channel_path_globs)}
        )

    def add_segmentations(
        self,
        additional_func_to_map: Callable = None,
        ad_func_kwargs: dict = None,
        **channel_path_globs
    ):
        self.image_data["segmentations"] = super().add_segmentations(
            additional_func_to_map, ad_func_kwargs, **channel_path_globs
        )

    def add_clones_and_neighbouring_labels(
        self,
        query_for_pd: str = 'int_img_ch == "C1" & mean_intensity > 1000',
        name_for_query: str = "filt_C1_intensity",
        calc_clones: str = True,
    ):
        self.image_data[name_for_query] = super().add_clones_and_neighbouring_labels(
            query_for_pd, name_for_query, calc_clones
        )

In [None]:
# export
class PersistentCloneCounter(CloneCounter):
    def __init__(self, exp_name: str, img_name_regex: str, pixel_size: float):
        super().__init__(exp_name, img_name_regex, pixel_size)

    def add_images(self, **channel_path_globs):
        self.image_data = xr.Dataset(
            {"images": super().add_images(**channel_path_globs)}
        ).persist()

    def add_segmentations(
        self,
        additional_func_to_map: Callable = None,
        ad_func_kwargs: dict = None,
        **channel_path_globs,
    ):
        self.image_data["segmentations"] = (
            super()
            .add_segmentations(
                additional_func_to_map, ad_func_kwargs, **channel_path_globs
            )
            .persist()
        )

    def add_clones_and_neighbouring_labels(
        self,
        query_for_pd: str = 'int_img_ch == "C1" & mean_intensity > 1000',
        name_for_query: str = "filt_C1_intensity",
        calc_clones: str = True,
    ):
        self.image_data[name_for_query] = (
            super()
            .add_clones_and_neighbouring_labels(
                query_for_pd, name_for_query, calc_clones
            )
            .persist()
        )

In [None]:
# hide
from dask.distributed import Client

c = Client()
c

0,1
Connection method: Cluster object,Cluster type: LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Status: running,Using processes: True
Dashboard: http://127.0.0.1:8787/status,Workers: 4
Total threads:  8,Total memory:  8.00 GiB

0,1
Comm: tcp://127.0.0.1:50671,Workers: 4
Dashboard: http://127.0.0.1:8787/status,Total threads:  8
Started:  Just now,Total memory:  8.00 GiB

0,1
Comm: tcp://127.0.0.1:50681,Total threads: 2
Dashboard: http://127.0.0.1:50683/status,Memory: 2.00 GiB
Nanny: tcp://127.0.0.1:50674,
Local directory: /Users/ottomorris/Documents/py_clone_detective/dask-worker-space/worker-j3_39uwy,Local directory: /Users/ottomorris/Documents/py_clone_detective/dask-worker-space/worker-j3_39uwy

0,1
Comm: tcp://127.0.0.1:50687,Total threads: 2
Dashboard: http://127.0.0.1:50688/status,Memory: 2.00 GiB
Nanny: tcp://127.0.0.1:50676,
Local directory: /Users/ottomorris/Documents/py_clone_detective/dask-worker-space/worker-iopbg1qj,Local directory: /Users/ottomorris/Documents/py_clone_detective/dask-worker-space/worker-iopbg1qj

0,1
Comm: tcp://127.0.0.1:50689,Total threads: 2
Dashboard: http://127.0.0.1:50691/status,Memory: 2.00 GiB
Nanny: tcp://127.0.0.1:50675,
Local directory: /Users/ottomorris/Documents/py_clone_detective/dask-worker-space/worker-8dvd7vt8,Local directory: /Users/ottomorris/Documents/py_clone_detective/dask-worker-space/worker-8dvd7vt8

0,1
Comm: tcp://127.0.0.1:50682,Total threads: 2
Dashboard: http://127.0.0.1:50684/status,Memory: 2.00 GiB
Nanny: tcp://127.0.0.1:50673,
Local directory: /Users/ottomorris/Documents/py_clone_detective/dask-worker-space/worker-ofb5yip3,Local directory: /Users/ottomorris/Documents/py_clone_detective/dask-worker-space/worker-ofb5yip3


## Example using LazyCloneCounter with measure_overlap

In [None]:
foo._determine_max_seg_label_levels()

In [None]:
foo.tot_seg_ch_max_labels

In [None]:
foo = LazyCloneCounter("Marcm2a_E7F1", r"a\dg\d\dp\d", 0.275)

foo.add_images(
    C0="../current_imaging_analysis/MARCM2A_E7F1_refactoring/C0/C0_imgs/*.tif*",
    C1="../current_imaging_analysis/MARCM2A_E7F1_refactoring/C1/C1_imgs/*.tif*",
    C2="../current_imaging_analysis/MARCM2A_E7F1_refactoring/C2/C2_imgs/*.tif*",
    C3="../current_imaging_analysis/MARCM2A_E7F1_refactoring/C3/C3_imgs/*.tif*",
)

foo.add_segmentations(
    C0="../current_imaging_analysis/MARCM2A_E7F1_refactoring/C0/C0_label_imgs_combined_C3/*.tif*",
    C1="../current_imaging_analysis/MARCM2A_E7F1_refactoring/C1/C1_binaries/*.tif*",
    C2="../current_imaging_analysis/MARCM2A_E7F1_refactoring/C2/C2_label_imgs_v2/*.tif*",
    C3="../current_imaging_analysis/MARCM2A_E7F1_refactoring/C3/C3_label_imgs/*.tif*",
)
foo.make_measurements(extra_properties=["convex_area"],)
foo.measure_overlap()
foo.combine_C0_overlaps_and_measurements()

## Example using LazyCloneCounter with add_clones_and_neighbouring_labels

In [None]:
from skimage import morphology

In [None]:
foo = LazyCloneCounter("Marcm2a_E7F1", r"a\dg\d\dp\d", 0.275)

foo.add_images(
    C0="../current_imaging_analysis/MARCM2A_E7F1_refactoring/C0/C0_imgs/*.tif*",
    C1="../current_imaging_analysis/MARCM2A_E7F1_refactoring/C1/C1_imgs/*.tif*",
    C2="../current_imaging_analysis/MARCM2A_E7F1_refactoring/C2/C2_imgs/*.tif*",
    C3="../current_imaging_analysis/MARCM2A_E7F1_refactoring/C3/C3_imgs/*.tif*",
)

In [None]:
foo.add_segmentations(
    morphology.remove_small_objects,
    ad_func_kwargs={"min_size": 49},
    C0="../current_imaging_analysis/MARCM2A_E7F1_refactoring/C0/C0_label_imgs_combined_C3/*.tif*",
)

In [None]:
foo.make_measurements(extra_properties=["convex_area"],)

In [None]:
foo.add_clones_and_neighbouring_labels(
    query_for_pd='int_img_ch == "C1" & mean_intensity > 1000',
    name_for_query="filt_C1_int",
    calc_clones=True,
)

In [None]:
foo.add_clones_and_neighbouring_labels(
    query_for_pd='int_img_ch == "C2" & mean_intensity > 1800',
    name_for_query="filt_C2_int",
    calc_clones=False,
)

In [None]:
foo.image_data['images'].coords['img_name'].values[46]

'a2g04p1'

In [None]:
foo.results_measurements.query('int_img == "a2g04p1"')

Unnamed: 0,seg_ch,int_img_ch,seg_img,int_img,label,area,mean_intensity,centroid-0,centroid-1,convex_area,area_um2,convex_area_um2
0,C0,C0,a2g04p1,a2g04p1,1,412,4573.956311,9.538835,378.412621,439,31.157500,33.199375
1,C0,C0,a2g04p1,a2g04p1,2,60,253.666667,4.966667,358.800000,64,4.537500,4.840000
2,C0,C0,a2g04p1,a2g04p1,3,234,3061.098291,10.816239,258.948718,249,17.696250,18.830625
3,C0,C0,a2g04p1,a2g04p1,4,136,2997.367647,13.808824,243.080882,143,10.285000,10.814375
4,C0,C0,a2g04p1,a2g04p1,5,78,174.051282,18.782051,419.192308,93,5.898750,7.033125
...,...,...,...,...,...,...,...,...,...,...,...,...
148,C0,C3,a2g04p1,a2g04p1,149,1084,98.000000,789.814576,745.044280,1116,81.977500,84.397500
149,C0,C3,a2g04p1,a2g04p1,150,798,37.137845,792.151629,784.005013,819,60.348750,61.936875
150,C0,C3,a2g04p1,a2g04p1,151,468,114.299145,801.320513,638.416667,479,35.392500,36.224375
151,C0,C3,a2g04p1,a2g04p1,152,162,129.777778,805.246914,609.660494,166,12.251250,12.553750


In [None]:
#hide
import napari
view = napari.Viewer()
view.add_image(foo.image_data['images'].data, channel_axis = 0)
# view.add_labels(foo.image_data['filt_C1_int'].data)
view.add_labels(foo.image_data['filt_C2_int'].data)

  zoom = np.min(canvas_size / scale)


<Labels layer 'Labels' at 0x7fa37d711fd0>

In [None]:
foo.measure_clones_and_neighbouring_labels(name_for_query="filt_C1_int")

In [None]:
df = foo.results_clones_and_neighbour_counts.reset_index().copy()

In [None]:
df = df.query("extended_tot_seg_labels != 0")[
    [
        "extended_tot_seg_labels",
        "img_name",
        "clone",
        "total_neighbour_counts",
        "inside_clone_neighbour_counts",
        "outside_clone_neighbour_counts",
    ]
]

In [None]:
bar = foo.results_measurements.copy()

In [None]:
pd.merge(
    bar,
    df,
    how="inner",
    left_on=["int_img", "label"],
    right_on=["img_name", "extended_tot_seg_labels"],
).drop(columns=["img_name", "extended_tot_seg_labels"])