In [None]:
import numpy as np
import pandas as pd
import nd2reader
import matplotlib.pyplot as plt
import holoviews as hv
from holoviews.operation.datashader import regrid
import skimage.filters
import skimage.feature
import scipy.ndimage
import peakutils
from tqdm import tnrange, tqdm_notebook
import dask
import dask.array as da
import distributed
from distributed import Client, LocalCluster, progress
from dask_jobqueue import SLURMCluster
from cytoolz import partial, compose, juxt
from itertools import repeat
from glob import glob
import cachetools
import numpy_indexed
import pickle
import pyarrow as pa
import warnings
import os
from numbers import Integral
from dask.delayed import Delayed

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# from segmentation import *
# from util import *
# from matriarch_stub import *
import segmentation
import matriarch_stub

In [None]:
plt.rcParams["figure.figsize"] = (8, 8)
hv.extension("bokeh")

# Run

In [None]:
# dask.config.config['distributed']['scheduler']['allowed-failures'] = 20
# dask.config.config['distributed']['worker']['memory'] = {'target': 0.4,
#                                                         'spill': 0.5,
#                                                         'pause': 0.9,
#                                                         'terminate': 0.95}

In [None]:
cluster = SLURMCluster(
    queue="short",
    walltime="03:00:00",
    memory="8GB",
    local_directory="/tmp",
    log_directory="/home/jqs1/projects/molecule-counting/log",
    cores=1,
    processes=1,
)
# diagnostics_port=('127.0.0.1', 8787),
# env_extra=['export PYTHONPATH=\"/home/jqs1/projects/matriarch\"'])
client = Client(cluster)

In [None]:
cluster

In [None]:
cluster.scale(0)

# Heterogenous cluster

In [None]:
# TODO
# filter/debug bad FOVs
# reduce churn/transfers
# set up benchmarking tool, compare different chunking
# arbitrary sequence of traces using initial segmentation (replace sandwich)
# dry run without regionprops
# fix regionprops memory usage
# optimize submission performance

# BENCHMARK: try readahead buffering/chunk size
# named_funcs_as_juxt: decorator to turn {'func1': func1, ('q0.5', 'q0.7'): partial(np.percentile, q=(0.5,0.7))} into a multiple-valued func
# zarrification of labels (skip??)
# pin segmentation tasks to high-RAM nodes (in heterogenous dask cluster)

# convert dask arrays to delayed before calling short_circuit_none (otherwise we wait until all frames are in RAM)
# don't process FOV if too many labels

# Test

In [None]:
ary = segmentation.nd2_to_dask(
    "/n/scratch2/jqs1/190922/190922_photobleaching_greens/GFP_photobleaching_100pct_100ms_0001.nd2",
    0,
    "GFP-PENTA",
)

In [None]:
labels = segmentation.segment(ary[0].compute())

In [None]:
labels_delayed0 = dask.delayed(segmentation.segment)(ary[0].compute())

In [None]:
labels_delayed = dask.delayed(segmentation.segment)(ary[0])

In [None]:
stack = ary[:10].compute()

In [None]:
u, z = segmentation.aggregate_dask(partial(np.mean, axis=1), labels, stack)
z[0].shape

In [None]:
u, z = segmentation.aggregate_dask(partial(np.mean, axis=1), labels, ary[:10])
z[0].shape

In [None]:
u, z = segmentation.aggregate_dask(partial(np.mean, axis=1), labels_delayed, ary)
z  # [0]#.shape

In [None]:
zz = z.compute()

In [None]:
zz.shape

## Run

In [None]:
base_filename = "/n/scratch2/jqs1"
fluorescence_filenames = glob(
    os.path.join(base_filename, "190922/*/*photobleaching*.nd2")
)
phase_filenames = (
    []
)  # glob(os.path.join(base13_filename, 'phase/*_0001.nd2')) + glob('/n/scratch2/jqs1/fidelity/190325/phase/*/*_0001.nd2')
sandwich_filenames = []  # glob(os.path.join(base13_filename, 'sandwich/*_0001.nd2'))

In [None]:
fluorescence_filenames = fluorescence_filenames[:50]

In [None]:
funcs = None  # not used

In [None]:
data_graph = {}
for photobleaching_filename in fluorescence_filenames[:]:
    data_graph[photobleaching_filename] = segmentation.process_file(
        funcs, photobleaching_filename
    )

# for photobleaching_filename in phase_filenames[:]:
#     segmentation_filename = photobleaching_filename.replace('_0001.nd2', '.nd2')
#     data_graph[segmentation_filename] = segmentation.process_file(funcs, photobleaching_filename,
#                                                                     segmentation_filename=segmentation_filename)

# for initial_filename in sandwich_filenames[:]:
#     segmentation_filename = initial_filename.replace('_0001.nd2', '.nd2')
#     photobleaching_filename = initial_filename.replace('_0001.nd2', '_0002.nd2')
#     final_filename = initial_filename.replace('_0001.nd2', '_0003.nd2')
#     data_graph[segmentation_filename] = segmentation.process_file(funcs, photobleaching_filename,
#                                                                   segmentation_filename=segmentation_filename,
#                                                                   initial_filename=initial_filename,
#                                                                   final_filename=final_filename)

In [None]:
# split up computes so we can gather results from multiple workers
# (otherwise the single worker assembling the dict will run out of memory)
# TODO: use recursive_map(..., levels=?)
data_futures = {
    k: {k2: client.compute(v2) for k2, v2 in v.items()} for k, v in data_graph.items()
}

## Save data

In [None]:
data = client.gather(data_futures)

In [None]:
filename = "/n/groups/paulsson/jqs1/molecule-counting/191229photobleaching.pickle"
with open(filename, "wb") as f:
    pickle.dump(data, f)

In [None]:
{
    k: {pos: np.asarray(d["labels"]).max() for pos, d in v.items()}
    for k, v in data.items()
    if k[0] != "_"
}

In [None]:
d = data[
    "/n/scratch2/jqs1/190922/CFP_photobleaching/CFP_photobleaching_50pct_100ms.nd2_0054.nd2"
][0]

In [None]:
plt.figure(figsize=(20, 20))
plt.imshow(d["segmentation_frame"])

In [None]:
plt.figure(figsize=(20, 20))
plt.imshow(d["labels"])