In [None]:
import numpy as np
import pandas as pd
import nd2reader
import matplotlib.pyplot as plt
import holoviews as hv

# from holoviews.operation.datashader import regrid
import skimage

# import skimage.filters
# import skimage.feature
# import scipy.ndimage
from tqdm.auto import tqdm
import dask
import dask.array as da
import distributed
from distributed import Client, LocalCluster, progress
from dask_jobqueue import SLURMCluster
from cytoolz import partial, compose, juxt
from itertools import repeat
from pathlib import Path
from glob import glob

# import cachetools

# import numpy_indexed
import pickle

# import pyarrow as pa
import warnings
import os
from numbers import Integral
from dask.delayed import Delayed

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import paulssonlab.projects.sigma_circuits.segmentation as segmentation
import paulssonlab.projects.sigma_circuits.matriarch_stub as matriarch_stub
import paulssonlab.projects.sigma_circuits.experiment as experiment
import paulssonlab.io.metadata as metadata

In [None]:
plt.rcParams["figure.figsize"] = (20, 10)
hv.extension("bokeh")

# Config

In [None]:
dask.config.config["distributed"]["scheduler"]["allowed-failures"] = 6
dask.config.config["distributed"]["worker"]["memory"] = {
    "target": 0.7,
    "spill": 0.8,
    "pause": 0.9,
    "terminate": 0.95,
}

In [None]:
# cluster = LocalCluster(scheduler_options=dict(dashboard_address="0.0.0.0:8787"))
# client = Client(cluster)

In [None]:
cluster = SLURMCluster(
    queue="short",
    walltime="05:00:00",
    memory="5GB",
    local_directory="/tmp",
    log_directory="/home/jqs1/projects/paulssonlab/paulssonlab/src/paulssonlab/projects/sigma_circuits/log",
    cores=1,
    processes=1,
    scheduler_options=dict(dashboard_address="0.0.0.0:8787"),
)
client = Client(cluster)

In [None]:
cluster

In [None]:
client.restart()

In [None]:
# cluster.scale(30)
cluster.adapt(maximum=100)

# Segmentation

In [None]:
nd2_filename = (
    "/n/scratch3/groups/hms/sysbio/paulsson/jqs1/210602/RBS_new_terminators.nd2"
)
nd2 = matriarch_stub.get_nd2_reader(nd2_filename)

In [None]:
%%time
data_graph = segmentation.process(
    nd2_filename, time_slice=slice(None), position_slice=slice(None), delayed=True
)

In [None]:
%%time
# split up computes so we can gather results from multiple workers
# (otherwise the single worker assembling the dict will run out of memory)
# TODO: use recursive_map(..., levels=?)
# TODO: why doesn't the second-level compute work but the first-level does?
# (segment only gets run after all frames are loaded into memory)
# data_futures = {
#     k: {k2: client.compute(v2) for k2, v2 in v.items()} for k, v in data_graph.items()
# }
data_futures = {k: client.compute(v) for k, v in data_graph.items()}
# data_futures = client.compute(data_graph)

In [None]:
del data_futures

In [None]:
# TODO: only gather non-failed futures

In [None]:
stored.keys()

In [None]:
# stored = {}

In [None]:
# for k, v in data_futures.items():
#     print(k)
#     if k in (104, 115):
#         continue
#     if k not in stored:
#         stored[k] = client.gather(v)

In [None]:
# dfs = stored

In [None]:
%%time
dfs = client.gather(data_futures)

In [None]:
failed_pos = {k for k, v in dfs.items() if any(v2 is None for v2 in v.values())}
failed_pos

In [None]:
df = pd.concat(
    {
        k: pd.concat({k2: v2 for k2, v2 in v.items() if v2 is not None}, names=["t"])
        for k, v in dfs.items()
    },
    names=["pos"],
)

In [None]:
df

In [None]:
df.info(memory_usage="full")

In [None]:
mux_log = experiment.parse_mux_log(Path(nd2_filename).parent / "mux.log")

In [None]:
md = metadata.parse_nd2_metadata(nd2)

In [None]:
experiment_txt_file = Path(nd2_filename).parent / "experiment.txt"
if experiment_txt_file.exists():
    experiment_txt = experiment_txt_file.open().read()
else:
    experiment_txt = None

In [None]:
grid_df = experiment.get_grid(nd2)

In [None]:
data = {
    "table": df,
    "metadata": md,
    "mux_log": mux_log,
    "experiment.txt": experiment_txt,
    "grid": grid_df,
}

## Save data

In [None]:
filename = "/n/groups/paulsson/jqs1/sigma-circuits/210605_expt210602_1.pickle"
with open(filename, "wb") as f:
    pickle.dump(data, f)