In [None]:
import h5py

In [None]:
import numpy as np

In [None]:
headpath = "/n/scratch3/users/d/de64/190922_20x_phase_gfp_segmentation/190922_20x_phase_gfp_segmentation"

In [None]:
with h5py.File(headpath + "/kymograph/kymograph_0.hdf5", "r") as infile:
    data = infile["GFP"][:]

In [None]:
data.shape

In [None]:
import matplotlib
from matplotlib import pyplot as plt

matplotlib.rcParams["figure.figsize"] = [20, 10]
import skimage as sk

import paulssonlab.deaton.trenchripper.trenchripper as tr

In [None]:
for i in range(0, 40, 2):
    with h5py.File(
        headpath + "/kymograph/kymograph_" + str(i) + ".hdf5", "r"
    ) as infile:
        data = infile["GFP"][0, 0]
    plt.imshow(data / np.prod(data.shape))
    plt.show()

    H = data.shape[0]
    W = data.shape[1]
    ten = np.sum(sk.filters.sobel_h(data) ** 2 + sk.filters.sobel_v(data) ** 2)
    print(ten)

    AC = np.sum(data[:, :-1] * data[:, 1:]) - np.sum(data[:, :-5] * data[:, 5:])
    print(AC)

In [None]:
def get_focus_score(img_arr):
    # computes focus score from single image
    img_min = np.min(img_arr)
    img_max = np.max(img_arr)
    I = (img_arr - img_min) / (img_max - img_min)

    Sx = sk.filters.sobel_h(I)
    Sy = sk.filters.sobel_v(I)
    Ften = np.sum(Sx**2 + Sy**2)
    return Ften

In [None]:
F_list = []
img_list = []
for i in range(0, 200):
    with h5py.File(
        headpath + "/kymograph/kymograph_" + str(i) + ".hdf5", "r"
    ) as infile:
        for j in range(5):
            data = infile["GFP"][j, 0]

            if np.sum(data) / np.prod(data.shape) > 2000:
                ten = get_focus_score(data)

                #                 ten = np.sum(sk.filters.sobel_h(data)**2 + sk.filters.sobel_v(data)**2)

                #     AC = np.sum(data[:,:-1]*data[:,1:]) - np.sum(data[:,:-2]*data[:,2:])
                F_list.append(ten)
                img_list.append(data)

F_arr = np.array(F_list)
img_arr = np.array(img_list)

In [None]:
plt.hist(F_list, bins=20)

In [None]:
high_F = F_arr > 80.0
handle = tr.kymo_handle()
handle.import_wrap(img_arr[high_F][:])
plt.imshow(handle.return_unwrap()[:, :1000])
plt.show()

handle = tr.kymo_handle()
handle.import_wrap(img_arr[~high_F][:])
plt.imshow(handle.return_unwrap()[:, :1500])
plt.show()

In [None]:
low_F = AC_arr > 1.3
handle = tr.kymo_handle()
handle.import_wrap(img_arr[high_F])
plt.imshow(handle.return_unwrap()[:, :500])
plt.show()

In [None]:
plt.imshow(handle.return_unwrap()[:, :500])
plt.show()

In [None]:
plt.imshow(handle.return_unwrap()[:, :500])
plt.show()

#### Implementing in Kymograph

In [None]:
import warnings

import paulssonlab.deaton.trenchripper.trenchripper as tr

warnings.filterwarnings(action="once")

import matplotlib

matplotlib.rcParams["figure.figsize"] = [20, 10]

import dask.dataframe as dd

In [None]:
headpath = "/n/scratch3/users/d/de64/190917_20x_phase_gfp_segmentation002/190917_20x_phase_gfp_segmentation002/"

In [None]:
viewer = tr.hdf5_viewer(headpath)

In [None]:
viewer.view()

In [None]:
dask_controller = tr.trcluster.dask_controller(
    walltime="04:00:00",
    local=False,
    n_workers=10,
    memory="4GB",
    working_directory=headpath + "/dask",
)
dask_controller.startdask()

In [None]:
dask_controller.daskclient

In [None]:
kymoclust = tr.kymograph.kymograph_cluster(
    headpath=headpath, trenches_per_file=25, paramfile=True
)

In [None]:
kymoclust.generate_kymographs(dask_controller)

now want histogram and examples


show histogram -> apply filter -> plot random sample of both classes

In [None]:
import pickle

import h5py
import numpy as np
from ipywidgets import (
    Dropdown,
    FloatRangeSlider,
    FloatSlider,
    IntRangeSlider,
    IntSlider,
    IntText,
    Select,
    SelectMultiple,
    fixed,
    interact,
    interact_manual,
    interactive,
)
from matplotlib import pyplot as plt

In [None]:
class focus_filter:
    def __init__(self, headpath):
        self.headpath = headpath
        self.kymographpath = headpath + "/kymograph"
        self.df = dd.read_parquet(self.kymographpath + "/metadata")

        self.final_params = {}

    def choose_filter_channel(self, channel):
        self.final_params["Filter Channel"] = channel
        self.channel = channel

    def choose_filter_channel_inter(self):
        channel_options = [
            column[:-12]
            for column in self.df.columns.tolist()
            if column[-11:] == "Focus Score"
        ]

        choose_channel = interactive(
            self.choose_filter_channel,
            {"manual": True},
            channel=Dropdown(options=channel_options, value=channel_options[0]),
        )
        display(choose_channel)

    def subsample_df(self, df, n_samples):
        ttl_rows = len(df)
        n_samples = min(n_samples, ttl_rows)
        frac = min((n_samples / ttl_rows) * 1.1, 1.0)
        subsampled_df = df.sample(frac=frac, replace=False).compute()[:n_samples]
        return subsampled_df

    def plot_histograms(self, n_samples=10000):
        subsampled_df = self.subsample_df(self.df, n_samples)
        focus_vals = subsampled_df[self.channel + " Focus Score"]
        self.focus_max = np.max(focus_vals)

        fig, ax = plt.subplots(1, 1)
        ax.hist(focus_vals, bins=50)
        ax.set_title("Focus Score Distribution", fontsize=20)
        ax.set_xlabel("Focus Score", fontsize=15)
        fig.set_size_inches(9, 6)
        fig.show()

        intensity_vals = subsampled_df[self.channel + " Mean Intensity"]
        self.intensity_max = np.max(intensity_vals)

        fig, ax = plt.subplots(1, 1)
        ax.hist(intensity_vals, bins=50)
        ax.set_title("Mean Intensity Distribution", fontsize=20)
        ax.set_xlabel("Mean Intensity", fontsize=15)
        fig.set_size_inches(9, 6)
        fig.show()

    def plot_trench_sample(self, df, cmap="Greys_r", title=""):
        array_list = []
        for index, row in df.iterrows():
            file_idx = row["File Index"]
            row_idx = str(row["row"])
            trench_idx = row["trench"]
            timepoint_idx = row["timepoints"]

            with h5py.File(
                self.kymographpath + "/kymograph_processed_" + str(file_idx) + ".hdf5",
                "r",
            ) as hdf5_handle:
                array = hdf5_handle[row_idx + "/" + self.channel][
                    trench_idx, timepoint_idx
                ]
            array_list.append(array)
        output_array = np.concatenate(np.expand_dims(array_list, axis=0), axis=0)
        kymo = tr.kymo_handle()
        kymo.import_wrap(output_array)
        kymo = kymo.return_unwrap()

        fig, ax = plt.subplots(1, 1)
        ax.set_title(title, fontsize=20)
        ax.imshow(kymo, cmap=cmap)
        fig.set_size_inches(18, 12)
        fig.show()

    def plot_focus_threshold(
        self, focus_thr=60, intensity_thr=0, perc_above_thr=1.0, n_images=50
    ):
        self.final_params["Focus Threshold"] = focus_thr
        self.final_params["Intensity Threshold"] = intensity_thr
        self.final_params["Percent Of Kymograph"] = perc_above_thr

        thr_bool = (self.df[self.channel + " Focus Score"] > focus_thr) & (
            self.df[self.channel + " Mean Intensity"] > intensity_thr
        )

        above_thr_df = self.df[thr_bool]
        below_thr_df = self.df[~thr_bool]

        above_thr_df = self.subsample_df(above_thr_df, n_images).sort_index()
        below_thr_df = self.subsample_df(below_thr_df, n_images).sort_index()

        self.plot_trench_sample(above_thr_df, title="Above Threshold")
        self.plot_trench_sample(below_thr_df, title="Below Threshold")

    def plot_focus_threshold_inter(self):
        focus_threshold = interactive(
            self.plot_focus_threshold,
            {"manual": True},
            focus_thr=IntSlider(value=0, min=0, max=self.focus_max, step=1),
            intensity_thr=IntSlider(value=0, min=0, max=self.intensity_max, step=1),
            perc_above_thr=FloatSlider(value=1.0, min=0.0, max=1.0, step=0.05),
            n_images=IntText(value=50, description="Number of images:", disabled=False),
        )

        display(focus_threshold)

    def write_param_file(self):
        with open(self.headpath + "/focus_filter.par", "wb") as outfile:
            pickle.dump(self.final_params, outfile)

In [None]:
ff = focus_filter(kymoclust.headpath)

In [None]:
ff.choose_filter_channel_inter()

In [None]:
ff.plot_histograms()

In [None]:
ff.plot_focus_threshold_inter()

In [None]:
ff.write_param_file()

In [None]:
kymoclust.post_process(dask_controller)

In [None]:
kymoclust.kymo_report()

In [None]:
df = dd.read_parquet(kymoclust.kymographpath + "/metadata")

In [None]:
[column[:-12] for column in df.columns.tolist() if column[-11:] == "Focus Score"]

In [None]:
focus_vals = subsample_df(df["GFP Focus Score"], 10000)

In [None]:
from matplotlib import pyplot as plt

In [None]:
fig, ax = plt.subplots(1, 1)
ax.hist(focus_vals, bins=50)
ax.set_title("Focus Score Distribution", fontsize=20)
ax.set_xlabel("Focus Score", fontsize=15)
fig.set_size_inches(9, 6)
fig.show()

In [None]:
plot_focus_threshold(df)

In [None]:
def plot_focus_threshold(df,focus_thr=60,n_images=100,channel="GFP")
    thr_bool = df[channel + " Focus Score"]>focus_thr
    above_thr_df = df[thr_bool]
    below_thr_df = df[~thr_bool]

    above_thr_df = subsample_df(above_thr_df,n_images).sort_index()
    below_thr_df = subsample_df(below_thr_df,n_images).sort_index()

    plot_trench_sample(above_thr_df,"GFP")
    plot_trench_sample(below_thr_df,"GFP")

In [None]:
output_array.shape

In [None]:
thr_bool = df["GFP Focus Score"] > focus_thr
above_thr_df = df[thr_bool]
below_thr_df = df[~thr_bool]

In [None]:
above_thr_df

focus_vals = (
    df["GFP Focus Score"].sample(frac=frac, replace=False).compute()[:n_samples]
)

In [None]:
self.fig_size = (fig_size_y, fig_size_x)
self.img_per_row = img_per_row

rand_trench_arr = np.random.choice(self.trenchid_arr, size=(n_trenches,), replace=False)
self.selecteddf = self.kymodf.loc[
    list(zip(rand_trench_arr, np.zeros(len(rand_trench_arr)).astype(int)))
]
selectedlist = list(
    zip(
        self.selecteddf["File Index"].tolist(),
        self.selecteddf["File Trench Index"].tolist(),
    )
)

array_list = []
for item in selectedlist:
    with h5py.File(
        self.kymographpath + "/kymograph_" + str(item[0]) + ".hdf5", "r"
    ) as hdf5_handle:
        if t_range[1] == None:
            array = hdf5_handle[self.seg_channel][
                item[1], t_range[0] :: t_subsample_step
            ]
        else:
            array = hdf5_handle[self.seg_channel][
                item[1], t_range[0] : t_range[1] + 1 : t_subsample_step
            ]
    array_list.append(array)
output_array = np.concatenate(np.expand_dims(array_list, axis=0), axis=0)
self.t_tot = output_array.shape[1]
self.plot_kymographs(output_array)
self.output_array = output_array

In [None]:
kymoclust.post_process(dask_controller)

In [None]:
kymoclust.kymo_report()

In [None]:
df = dd.read_parquet(kymoclust.kymographpath + "/metadata/").compute()

In [None]:
import h5py
from matplotlib import pyplot as plt

import paulssonlab.deaton.trenchripper.trenchripper as tr

In [None]:
print("moo")

In [None]:
file_idx = 4
with h5py.File(
    "/n/scratch3/users/d/de64/190917_20x_phase_gfp_segmentation002/190917_20x_phase_gfp_segmentation002/kymograph/kymograph_"
    + str(file_idx)
    + ".hdf5",
    "r",
) as infile:
    data = infile["GFP"][:]
    data = data.reshape(-1, data.shape[2], data.shape[3])
    handle = tr.kymo_handle()
    handle.import_wrap(data)
plt.imshow(handle.return_unwrap()[:, 0:1000])
plt.show()

In [None]:
plt.hist(df["GFP Focus Score"])

In [None]:
handle.import_wrap(data)

In [None]:
from matplotlib import pyplot as plt

plt.imshow(handle.return_unwrap())
plt.show()

In [None]:
df = dd.read_parquet(kymoclust.kymographpath + "/metadata").compute()

In [None]:
from matplotlib import pyplot as plt

plt.hist(df["GFP Focus Score"].tolist(), bins=100, range=(0, 10))
plt.show()

In [None]:
df

In [None]:
import numpy as np
import pandas as pd

In [None]:
len(df[df["GFP Focus Score"] > 1.0])

In [None]:
trench_group = df.groupby(["fov", "row", "trench"])

max_discont = first_gen_df.groupby("trenchid").apply(compute_del_area)
max_discont_filter = max_discont < 1.75

In [None]:
df = dd.read_parquet(kymoclust.kymographpath + "/metadata")

In [None]:
dfgroup = df.groupby("trenchid")

In [None]:
d

In [None]:
def add_list_to_column(df, list_to_add, column_name):
    df = df.reset_index(drop=True)
    df = df.reset_index(drop=False)
    idx = df["index"].compute()

    list_to_add = pd.DataFrame(list_to_add)
    list_to_add["index"] = idx
    df = df.join(list_to_add.set_index("index"), how="left", on="index")

    df = df.drop(["index"], axis=1)

    df.columns = df.columns.tolist()[:-1] + [column_name]

    return df


def filter_focus(channel, df, focus_threshold=0.1, perc_above=0.5):
    num_above = np.round(len(df["timepoints"].unique()) * perc_above).astype(int)

    trench_group = df.groupby("trenchid")
    focus_filter = trench_group.apply(
        lambda x: np.sum(x[channel + " Focus Score"] > focus_threshold) > num_above
    ).compute()
    focus_filter = pd.DataFrame({"focus filter": focus_filter})
    out_df = df.join(focus_filter, on="trenchid")
    out_df = out_df[out_df["focus filter"]]
    out_df = out_df.drop(labels="focus filter", axis=1)
    return out_df


def reindex_trenches(df):
    num_timepoints = len(df["timepoints"].unique())
    new_trenches = df.groupby(["fov", "row"]).apply(
        lambda x: np.repeat(
            list(range(0, len(x["trench"].unique()))), repeats=num_timepoints
        )
    )
    new_trenches = [element for list_ in new_trenches for element in list_]
    df = df.drop(["trenchid", "trench"], axis=1)

    df = add_list_to_column(df, new_trenches, "trench")
    cols = df.columns.tolist()
    reordered_columns = cols[:2] + cols[-1:] + cols[2:-1]
    df = df[reordered_columns]

    fov_idx = (
        df.apply(
            lambda x: int(
                f'{x["fov"]:04}{x["row"]:04}{x["trench"]:04}{x["timepoints"]:04}'
            ),
            axis=1,
        )
        .compute()
        .tolist()
    )

    df = add_list_to_column(df, fov_idx, "FOV Parquet Index")
    df = df.set_index("FOV Parquet Index")
    return df

In [None]:
out_df = filter_focus("GFP", df, focus_threshold=1.0, perc_above=0.5)

In [None]:
out_df.compute()

In [None]:
out_df = reindex_trenches(out_df)

In [None]:
out_df.compute()

In [None]:
out_df = kymoclust.add_trenchids(out_df)

In [None]:
out_df.compute()

In [None]:
df = dd.read_parquet(kymoclust.kymographpath + "/metadata").persist()
df = filter_focus("GFP", df, focus_threshold=1.0, perc_above=0.5)

trenchid_list = df["trenchid"].unique().compute().tolist()
file_list = df["File Index"].unique().compute().tolist()
outputdf = df.drop(columns=["File Index", "Image Index"]).persist()
trenchiddf = df.set_index("trenchid").persist()

num_tpts = len(trenchiddf["timepoints"].unique().compute().tolist())
chunk_size = kymoclust.trenches_per_file * num_tpts
if len(trenchid_list) % kymoclust.trenches_per_file == 0:
    num_files = len(trenchid_list) // kymoclust.trenches_per_file
else:
    num_files = (len(trenchid_list) // kymoclust.trenches_per_file) + 1


file_indices = np.repeat(np.array(range(num_files)), chunk_size)[: len(outputdf)]
file_trenchid = np.repeat(np.array(range(kymoclust.trenches_per_file)), num_tpts)
file_trenchid = np.repeat(file_trenchid[:, np.newaxis], num_files, axis=1).T.flatten()[
    : len(outputdf)
]
file_indices = pd.DataFrame(file_indices)
file_trenchid = pd.DataFrame(file_trenchid)
file_indices.index = outputdf.index
file_trenchid.index = outputdf.index

outputdf = add_list_to_column(outputdf, file_indices[0].tolist(), "File Index")
outputdf = add_list_to_column(outputdf, file_trenchid[0].tolist(), "File Trench Index")

parq_file_idx = outputdf.apply(
    lambda x: int(
        f'{int(x["File Index"]):04}{int(x["File Trench Index"]):04}{int(x["timepoints"]):04}'
    ),
    axis=1,
    meta=int,
)
outputdf["File Parquet Index"] = parq_file_idx
outputdf = outputdf.astype(
    {"File Index": int, "File Trench Index": int, "File Parquet Index": int}
)


outputdf = reindex_trenches(outputdf)
outputdf = kymoclust.add_trenchids(outputdf)

In [None]:
outputdf.compute()

In [None]:
for k in range(0, num_files):
    print(k)

In [None]:
k = 0
output_file_path = kymoclust.kymographpath + "/kymograph_" + str(k) + ".hdf5"
with h5py.File(output_file_path, "w") as outfile:
    for channel in kymoclust.all_channels:
        trenchids = trenchid_list[
            k * kymoclust.trenches_per_file : (k + 1) * kymoclust.trenches_per_file
        ]
        working_trenchdf = trenchiddf.loc[trenchids].compute()
        fov_list = working_trenchdf["fov"].unique().tolist()
        trench_arr_fovs = []
        for fov in fov_list:
            working_fovdf = working_trenchdf[working_trenchdf["fov"] == fov]
            file_list = working_fovdf["File Index"].unique().tolist()

            trench_arr_files = []
            for file_idx in file_list:
                proc_file_path = (
                    kymoclust.kymographpath
                    + "/kymograph_processed_"
                    + str(file_idx)
                    + ".hdf5"
                )
                with h5py.File(proc_file_path, "r") as infile:
                    working_filedf = working_fovdf[
                        working_fovdf["File Index"] == file_idx
                    ]
                    row_list = working_filedf["row"].unique().tolist()

                    trench_arr_rows = []
                    for row in row_list:
                        working_rowdf = working_filedf[working_filedf["row"] == row]
                        #                                 trenches = working_rowdf["trench"].unique().tolist()
                        #                                 first_trench_idx,last_trench_idx = (trenches[0],trenches[-1])
                        #                                 kymo_arr = infile[str(row) + "/" + channel][first_trench_idx:(last_trench_idx+1)]
                        trenches = working_rowdf["trench"].unique().tolist()
                        print(trenches)
                        kymo_arr = infile[str(row) + "/" + channel][trenches]
                        trench_arr_rows.append(kymo_arr)
                trench_arr_rows = np.concatenate(
                    trench_arr_rows, axis=0
                )  # k x t x y x x
                trench_arr_files.append(trench_arr_rows)
            trench_arr_files = np.concatenate(trench_arr_files, axis=1)  # k x t x y x x
            trench_arr_fovs.append(trench_arr_files)
        trench_arr_fovs = np.concatenate(trench_arr_fovs, axis=0)  # k x t x y x x
        hdf5_dataset = outfile.create_dataset(
            str(channel), data=trench_arr_fovs, dtype="uint16"
        )

In [None]:
trench_arr_fovs.shape

In [None]:
def post_process(self, dask_controller):
    dask_controller.futures = {}

    df = dd.read_parquet(self.kymographpath + "/metadata").persist()
    #         df = self.add_trenchids(df).persist() #NEW

    trenchid_list = df["trenchid"].unique().compute().tolist()
    file_list = df["File Index"].unique().compute().tolist()
    outputdf = df.drop(columns=["File Index", "Image Index"]).persist()
    trenchiddf = df.set_index("trenchid").persist()

    #         with open(self.kymographpath + "/metadata.pkl", 'rb') as handle:
    #             metadata = pickle.load(handle)

    num_tpts = len(trenchiddf["timepoints"].unique().compute().tolist())
    chunk_size = self.trenches_per_file * num_tpts
    if len(trenchid_list) % self.trenches_per_file == 0:
        num_files = len(trenchid_list) // self.trenches_per_file
    else:
        num_files = (len(trenchid_list) // self.trenches_per_file) + 1

    file_indices = np.repeat(np.array(range(num_files)), chunk_size)[: len(outputdf)]
    file_trenchid = np.repeat(np.array(range(self.trenches_per_file)), num_tpts)
    file_trenchid = np.repeat(
        file_trenchid[:, np.newaxis], num_files, axis=1
    ).T.flatten()[: len(outputdf)]
    file_indices = pd.DataFrame(file_indices)
    file_trenchid = pd.DataFrame(file_trenchid)
    file_indices.index = outputdf.index
    file_trenchid.index = outputdf.index

    outputdf["File Index"] = file_indices[0]
    outputdf["File Trench Index"] = file_trenchid[0]
    parq_file_idx = outputdf.apply(
        lambda x: int(
            f'{int(x["File Index"]):04}{int(x["File Trench Index"]):04}{int(x["timepoints"]):04}'
        ),
        axis=1,
        meta=int,
    )
    outputdf["File Parquet Index"] = parq_file_idx
    outputdf = outputdf.astype(
        {"File Index": int, "File Trench Index": int, "File Parquet Index": int}
    )

    random_priorities = np.random.uniform(size=(num_files,))
    for k in range(0, num_files):
        priority = random_priorities[k]
        future = dask_controller.daskclient.submit(
            self.reorg_kymograph,
            k,
            df,
            trenchid_list,
            trenchiddf,
            retries=1,
            priority=priority,
        )
        dask_controller.futures["Kymograph Reorganized: " + str(k)] = future

    reorg_futures = [
        dask_controller.futures["Kymograph Reorganized: " + str(k)]
        for k in range(num_files)
    ]
    future = dask_controller.daskclient.submit(
        self.cleanup_kymographs, reorg_futures, file_list, retries=1, priority=priority
    )
    dask_controller.futures["Kymographs Cleaned Up"] = future
    dask_controller.daskclient.gather([future])

    dd.to_parquet(
        outputdf,
        self.kymographpath + "/metadata",
        engine="fastparquet",
        compression="gzip",
        write_metadata_file=True,
    )

In [None]:
import h5py

proc_file_path = "/n/scratch3/users/d/de64/190917_20x_phase_gfp_segmentation002/190917_20x_phase_gfp_segmentation002//kymograph/kymograph_processed_1.hdf5"
with h5py.File(proc_file_path, "r") as infile:
    kymo_arr = infile[str(0) + "/" + "GFP"][[0, 4, 7]]

In [None]:
kymo_arr

In [None]:
out_df = kymoclust.add_trenchids(out_df)

In [None]:
out_df.compute()

In [None]:
test.compute()

In [None]:
def reindex_trenches(df):
    df = df.reset_index(drop=True)
    df = df.reset_index(drop=False)
    idx = df["index"].compute()

    num_timepoints = len(df["timepoints"].unique())
    new_trenches = df.groupby(["fov", "row"]).apply(
        lambda x: np.repeat(
            list(range(0, len(x["trench"].unique()))), repeats=num_timepoints
        )
    )
    new_trenches = [element for list_ in new_trenches for element in list_]
    new_trenches = pd.DataFrame(new_trenches)
    new_trenches["index"] = idx
    df = df.join(new_trenches.set_index("index"), how="left", on="index")
    df = df.drop(["index", "trenchid", "trench"], axis=1)
    df.columns = df.columns.tolist()[:-1] + ["trench"]
    reordered_columns = cols[:2] + cols[-1:] + cols[3:-1]
    df = df[reordered_columns]

    return df

In [None]:
cols = test.columns.tolist()

In [None]:
reordered_columns

In [None]:
test.compute()

In [None]:
test.compute()

In [None]:
test.

In [None]:
import skimage as sk


def get_focus_score(img_arr):
    # computes focus score from single image

    Sx = sk.filters.sobel_h(img_arr)
    Sy = sk.filters.sobel_v(img_arr)
    Ften = np.sum(Sx**2 + Sy**2)
    return Ften

In [None]:
working_rowdf

First, make focus measurements

In [None]:
import numpy as np
import pandas as pd

In [None]:
def get_focus_scores(file_idx):
    df = dd.read_parquet(kymoclust.kymographpath + "/metadata/")

    working_rowdfs = []

    proc_file_path = (
        kymoclust.kymographpath + "/kymograph_processed_" + str(file_idx) + ".hdf5"
    )
    with h5py.File(proc_file_path, "r") as infile:
        working_filedf = df[df["File Index"] == file_idx]
        row_list = working_filedf["row"].unique().compute().tolist()
        for row in row_list:
            working_rowdf = working_filedf[working_filedf["row"] == row].compute()
            kymo_arr = infile[str(row) + "/" + channel][:]
            original_shape = kymo_arr.shape
            kymo_arr = kymo_arr.reshape(-1, original_shape[2], original_shape[3])
            focus_scores = [
                get_focus_score(kymo_arr[i]) for i in range(kymo_arr.shape[0])
            ]
            working_rowdf["Focus Score"] = focus_scores
            working_rowdfs.append(working_rowdf)

    out_df = pd.concat(working_rowdfs)
    return out_df

In [None]:
import shutil
from time import sleep

import h5py
from dask import delayed
from distributed.client import futures_of


def get_focus_scores(self, file_idx):
    df = dd.read_parquet(self.kymographpath + "/metadata")

    working_rowdfs = []

    proc_file_path = (
        self.kymographpath + "/kymograph_processed_" + str(file_idx) + ".hdf5"
    )
    with h5py.File(proc_file_path, "r") as infile:
        working_filedf = df[df["File Index"] == file_idx]
        row_list = working_filedf["row"].unique().compute().tolist()
        for row in row_list:
            working_rowdf = working_filedf[working_filedf["row"] == row].compute()
            kymo_arr = infile[str(row) + "/" + channel][:]
            original_shape = kymo_arr.shape
            kymo_arr = kymo_arr.reshape(-1, original_shape[2], original_shape[3])
            focus_scores = [
                get_focus_score(kymo_arr[i]) for i in range(kymo_arr.shape[0])
            ]
            working_rowdf["Focus Score"] = focus_scores
            working_rowdfs.append(working_rowdf)

    out_df = pd.concat(working_rowdfs)
    return out_df


def get_all_focus_scores(self, channel):
    df = dd.read_parquet(self.kymographpath + "/metadata")

    file_list = df["File Index"].unique().compute().tolist()

    delayed_list = []

    for file_idx in file_list:
        df_delayed = delayed(get_focus_scores)(file_idx)
        delayed_list.append(df_delayed.persist())

    ## filtering out non-failed dataframes ##
    all_delayed_futures = []
    for item in delayed_list:
        all_delayed_futures += futures_of(item)
    while any(future.status == "pending" for future in all_delayed_futures):
        sleep(0.1)

    good_delayed = []
    for item in delayed_list:
        if all([future.status == "finished" for future in futures_of(item)]):
            good_delayed.append(item)

    ## compiling output dataframe ##
    df_out = dd.from_delayed(good_delayed).persist()
    df_out = df_out.repartition(partition_size="25MB").persist()
    tr.writedir(self.kymographpath + "/metadata", overwrite=True)
    dd.to_parquet(
        df_out,
        self.kymographpath + "/metadata/",
        engine="fastparquet",
        compression="gzip",
        write_metadata_file=True,
    )

In [None]:
get_all_focus_scores("GFP")

In [None]:
df = dd.read_parquet(kymoclust.kymographpath + "/metadata").persist()

In [None]:
df

In [None]:
            df_delayed = delayed(self.save_coords)(file_idx)
            self.delayed_list.append(df_delayed.persist())

        ## filtering out non-failed dataframes ##
        all_delayed_futures = []
        for item in self.delayed_list:
            all_delayed_futures+=futures_of(item)
        while any(future.status == 'pending' for future in all_delayed_futures):
            sleep(0.1)

        good_delayed = []
        for item in self.delayed_list:
            if all([future.status == 'finished' for future in futures_of(item)]):
                good_delayed.append(item)

        ## compiling output dataframe ##
        df_out = dd.from_delayed(good_delayed).persist()
        df_out = df_out.repartition(partition_size="25MB").persist()
        dd.to_parquet(df_out, self.kymographpath + "/metadata/",engine='fastparquet',compression='gzip',write_metadata_file=True)

In [None]:
np.apply_over_axes(get_focus_score, kymo_arr, [0])

In [None]:

df[]

trenchids = trenchid_list[k*self.trenches_per_file:(k+1)*self.trenches_per_file]
working_trenchdf = trenchiddf.loc[trenchids].compute()
fov_list = working_trenchdf["fov"].unique().tolist()
trench_arr_fovs = []
for fov in fov_list:
    working_fovdf = working_trenchdf[working_trenchdf["fov"]==fov]
    file_list = working_fovdf["File Index"].unique().tolist()

    trench_arr_files = []
    for file_idx in file_list:
        proc_file_path = self.kymographpath+"/kymograph_processed_"+str(file_idx)+".hdf5"
        with h5py.File(proc_file_path,"r") as infile:
            working_filedf = working_fovdf[working_fovdf["File Index"]==file_idx]
            row_list = working_filedf["row"].unique().tolist()

            trench_arr_rows = []
            for row in row_list:
                working_rowdf = working_filedf[working_filedf["row"]==row]
                trenches = working_rowdf["trench"].unique().tolist()
                first_trench_idx,last_trench_idx = (trenches[0],trenches[-1])
                kymo_arr = infile[str(row) + "/" + channel][first_trench_idx:(last_trench_idx+1)]
                trench_arr_rows.append(kymo_arr)
        trench_arr_rows = np.concatenate(trench_arr_rows,axis=0) # k x t x y x x
        trench_arr_files.append(trench_arr_rows)
    trench_arr_files = np.concatenate(trench_arr_files,axis=1) # k x t x y x x
    trench_arr_fovs.append(trench_arr_files)
trench_arr_fovs = np.concatenate(trench_arr_fovs,axis=0) # k x t x y x x
hdf5_dataset = outfile.create_dataset(str(channel), data=trench_arr_fovs, dtype="uint16")