In [None]:
import numpy as np
import pandas as pd
import nd2reader
import matplotlib.pyplot as plt
import holoviews as hv
from holoviews.operation.datashader import regrid
import skimage.filters
import skimage.feature
import scipy.ndimage
import peakutils
from tqdm import tnrange, tqdm_notebook
import dask
import dask.array as da
import distributed
from distributed import Client, LocalCluster, progress
from dask_jobqueue import SLURMCluster
from cytoolz import partial, compose
from itertools import repeat
from glob import glob
import cachetools
import numpy_indexed
import pickle
import pyarrow as pa
import warnings
import os
from numbers import Integral
from dask.delayed import Delayed

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# from segmentation import *
# from util import *
# from matriarch_stub import *
import segmentation
import matriarch_stub

In [None]:
plt.rcParams["figure.figsize"] = (8, 8)
hv.extension("bokeh")

# Run

In [None]:
dask.config.config["distributed"]["scheduler"]["allowed-failures"] = 20
# dask.config.config['distributed']['worker']['memory'] = {'target': 0.4,
#                                                         'spill': 0.5,
#                                                         'pause': 0.9,
#                                                         'terminate': 0.95}

In [None]:
cluster = SLURMCluster(
    queue="short",
    walltime="03:00:00",
    memory="8GB",
    local_directory="/tmp",
    log_directory="/home/jqs1/projects/molecule-counting/log",
    cores=1,
    processes=1,
)
# diagnostics_port=('127.0.0.1', 8787),
# env_extra=['export PYTHONPATH=\"/home/jqs1/projects/matriarch\"'])
client = Client(cluster)

In [None]:
cluster

In [None]:
cluster.scale(0)

## Run

In [None]:
funcs = {"mean": np.mean}  # ,
#'median': np.median}
#'p0.05': partial(np.percentile, q=5),
#'p0.20': partial(np.percentile, q=20),
#'p0.70': partial(np.percentile, q=70),
#'p0.95': partial(np.percentile, q=95)}

In [None]:
base13_filename = "/n/scratch2/jqs1/fidelity/190313"
fluorescence_filenames = (
    glob(os.path.join(base13_filename, "fluorescence/*.nd2"))
    + glob("/n/scratch2/jqs1/fidelity/190325/fluorescence/*/*.nd2")
    + glob("/n/scratch2/jqs1/fidelity/190326/*.nd2")
)
phase_filenames = glob(os.path.join(base13_filename, "phase/*_0001.nd2")) + glob(
    "/n/scratch2/jqs1/fidelity/190325/phase/*/*_0001.nd2"
)
sandwich_filenames = glob(os.path.join(base13_filename, "sandwich/*_0001.nd2"))

In [None]:
dark_frames = segmentation.nd2_to_dask(
    os.path.join(base13_filename, "calibration/dark_100ms.nd2"), 0, 0
)
dark_frame = dark_frames.mean(axis=0)
# TODO: hack
# dark_frame = dark_frame.compute()
# dark_frame = client.persist(dark_frame)
# dark_frame = client.scatter(dark_frame, broadcast=True)
dark_frame = dark_frame.to_delayed()[0, 0]

In [None]:
flat_fields = {}
for filename in glob(os.path.join(base13_filename, "calibration/*flat*100ms*.nd2")):
    channel = segmentation.get_nd2_reader(filename).metadata["channels"][0]
    flat_field = segmentation.nd2_to_dask(filename, 0, 0).mean(axis=0)
    # TODO: hack
    # flat_field = flat_field.compute()
    # flat_field = client.scatter(flat_field, broadcast=True)
    # flat_field = client.persist(flat_field)
    flat_field = flat_field.to_delayed()[0, 0]
    flat_fields[channel] = flat_field

In [None]:
# dark_frame = None
# flat_fields = {}

In [None]:
data_graph = {}
for photobleaching_filename in fluorescence_filenames[:]:
    data_graph[photobleaching_filename] = segmentation.process_file(
        funcs, photobleaching_filename, dark_frame=dark_frame, flat_fields=flat_fields
    )

for photobleaching_filename in phase_filenames[:]:
    segmentation_filename = photobleaching_filename.replace("_0001.nd2", ".nd2")
    data_graph[segmentation_filename] = segmentation.process_file(
        funcs,
        photobleaching_filename,
        segmentation_filename=segmentation_filename,
        dark_frame=dark_frame,
        flat_fields=flat_fields,
    )

for initial_filename in sandwich_filenames[:]:
    segmentation_filename = initial_filename.replace("_0001.nd2", ".nd2")
    photobleaching_filename = initial_filename.replace("_0001.nd2", "_0002.nd2")
    final_filename = initial_filename.replace("_0001.nd2", "_0003.nd2")
    data_graph[segmentation_filename] = segmentation.process_file(
        funcs,
        photobleaching_filename,
        segmentation_filename=segmentation_filename,
        initial_filename=initial_filename,
        final_filename=final_filename,
        dark_frame=dark_frame,
        flat_fields=flat_fields,
    )

In [None]:
# split up computes so we can gather results from multiple workers
# (otherwise the single worker assembling the dict will run out of memory)
# TODO: use recursive_map(..., levels=?)
data_futures = {
    k: {k2: client.compute(v2) for k2, v2 in v.items()} for k, v in data_graph.items()
}
data_futures["_calibration"] = client.compute(
    {"dark_frame": dark_frame, "flat_fields": flat_fields}
)

## Save data

In [None]:
data = client.gather(data_futures)

In [None]:
filename = (
    "/n/groups/paulsson/jqs1/molecule-counting/190405photobleaching_flatcorr.pickle"
)
with open(filename, "wb") as f:
    pickle.dump(data, f)

In [None]:
{
    k: {pos: d["labels"].max() for pos, d in v.items()}
    for k, v in data.items()
    if k[0] != "_"
}