In [1]:
import h5py
import glob
import pandas as pd

In [2]:
def compute_ploidy(df):
    reds = (df["color"] == "red").sum()
    greens = (df["color"] == "green").sum()

    return reds, greens


def filter_by_ploidy(df, min_ploidy):
    if min_ploidy < 1:
        return df
    dfs = []

    for cell, dff in df.groupby("cell"):
        dff1 = dff[dff["color"] == "red"]
        dff2 = dff[dff["color"] == "green"]

        if len(dff1) < min_ploidy or len(dff2) < min_ploidy:
            continue

        dff["cell"] = cell
        dfs.append(dff)

    return pd.concat(dfs)

In [3]:
oc_thresh = 0.95

In [4]:
print("path\tvalid_nuclei\tnuclei")
for path in sorted(glob.glob("../data/output/fish/MCF10A_*.h5")):
    with h5py.File(path) as h5:
        num_nuclei = 0
        tot_nuclei = 0
        for img in list(h5):
            scores = h5[f"{img}/scores"][:]
            num_nuclei += (scores >= oc_thresh).sum()
            tot_nuclei += len(scores)

        print(f"{path}\t{num_nuclei}\t{tot_nuclei}")

path	valid_nuclei	nuclei
../data/output/fish/MCF10A_C1_AD_segmented_nuclei_with_blobs.h5	95	129
../data/output/fish/MCF10A_C1_AE_segmented_nuclei_with_blobs.h5	43	70
../data/output/fish/MCF10A_WT_AD_segmented_nuclei_with_blobs.h5	34	45
../data/output/fish/MCF10A_WT_AE_segmented_nuclei_with_blobs.h5	28	37
../data/output/fish/MCF10A_WT_BE_segmented_nuclei_with_blobs.h5	1	11


In [5]:
print("path\tnum_red_blobs\tnum_green_blobs")
for path in sorted(glob.glob("../data/output/fish/blobs/MCF10A_*.tsv.gz")):
    df = filter_by_ploidy(pd.read_table(path), min_ploidy=2)
    num_red_blobs = (df["color"] == "red").sum()
    num_green_blobs = (df["color"] == "green").sum()
    print(f"{path}\t{num_red_blobs}\t{num_green_blobs}")

path	num_red_blobs	num_green_blobs
../data/output/fish/blobs/MCF10A_C1_AD.blobs.tsv.gz	269	272
../data/output/fish/blobs/MCF10A_C1_AE.blobs.tsv.gz	140	86
../data/output/fish/blobs/MCF10A_WT_AD.blobs.tsv.gz	62	61
../data/output/fish/blobs/MCF10A_WT_AE.blobs.tsv.gz	62	60
../data/output/fish/blobs/MCF10A_WT_BE.blobs.tsv.gz	2	2
