# Imports

In [None]:
import numpy as np
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
import pyarrow.feather as feather
import zarr
import dask
from dask import delayed
import distributed
from distributed import Client, LocalCluster, progress
from dask_jobqueue import SLURMCluster
import streamz
import streamz.dataframe as sdf
import holoviews as hv
from holoviews.streams import Stream, param
from holoviews.operation.datashader import regrid
from bokeh.models.tools import HoverTool
import matplotlib.pyplot as plt
import qgrid
import ipywidgets as widgets
from tqdm import tnrange, tqdm, tqdm_notebook
import warnings
from functools import partial
from cytoolz import *
from operator import getitem
import nd2reader
from importlib import reload
import traceback
import hvplot.pandas
import param
import parambokeh
from traitlets import All
import cachetools
from collections import namedtuple, defaultdict
from collections.abc import Mapping, Sequence
from numbers import Number
import skimage.morphology
import scipy
from glob import glob

IDX = pd.IndexSlice

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# from processing import *
# from trench_detection import *
# from trench_segmentation import *
# from trench_segmentation.watershed import *
# from util import *
# from ui import *
import common, trench_detection, util
import ui, diagnostics, metadata
import workflow, image, geometry
import trench_detection.hough, trench_detection.core
import trench_segmentation.watershed

In [None]:
%load_ext line_profiler
hv.extension("bokeh")
%matplotlib inline
tqdm.monitor_interval = 0

# Restore data

In [None]:
%store -r trench_points
%store -r trench_diag
%store -r trench_bboxes

# Config

In [None]:
cluster = SLURMCluster(
    queue="short",
    walltime="01:00:00",
    # job_extra=['-p transfer'],
    # job_extra=['--cores-per-socket=8'],
    # interface='ib0',
    memory="4GB",
    local_directory="/tmp",
    cores=1,
    processes=1,
    # diagnostics_port=('127.0.0.1', 8787),
    env_extra=['export PYTHONPATH="/home/jqs1/projects/matriarch"'],
)
client = Client(cluster)

In [None]:
cluster._widget().children[1].children[1].children[0].children[0].layout.width = "200px"
cluster

In [None]:
[w for cluster.running_jobs

In [None]:
cluster.stop_jobs(cluster.running_jobs.keys())

In [None]:
# client = Client()

# Loading data

In [None]:
# nd2_filenames = ['/n/scratch2/jqs1/fidelity/all/180405_txnerr.nd2', '/n/scratch2/jqs1/fidelity/all/180405_txnerr001.nd2']
# nd2_filenames = ['/n/scratch2/jqs1/fidelity/all/180405_txnerr002.nd2']#, '/n/scratch2/jqs1/fidelity/all/TrErr002_Exp.nd2']
# nd2_filenames = ['/n/scratch2/jqs1/fidelity/all/TrErr002_Exp.nd2']
# nd2_filenames = ['/n/scratch2/jqs1/fidelity/all/180405_txnerr.nd2', '/n/scratch2/jqs1/fidelity/all/180405_txnerr001.nd2',
#                 '/n/scratch2/jqs1/fidelity/all/180405_txnerr002.nd2', '/n/scratch2/jqs1/fidelity/all/TrErr002_Exp.nd2']
# nd2_filenames = ['/home/jqs1/scratch/fidelity/180518_triplegrowthcurve/PHASE_GC001.nd2', '/home/jqs1/scratch/fidelity/180518_triplegrowthcurve/PHASE_GC002.nd2']
nd2_filenames = glob("/n/scratch2/jqs1/fidelity/all/180405_*.nd2") + glob(
    "/n/scratch2/jqs1/fidelity/all/TrErr*.nd2"
)

In [None]:
all_frames, metadata, parsed_metadata = workflow.get_nd2_frame_list(nd2_filenames)
image_limits = workflow.get_filename_image_limits(metadata)

# Reload

In [None]:
def do_reload():
    from importlib import reload
    import util, trench_detection, diagnostics, workflow, image

    # reload(util)
    # reload(trench_detection.hough)
    # reload(diagnostics)
    reload(workflow)
    # reload(image)


client.run(do_reload)
do_reload()

# Finding trenches

In [None]:
frames_to_process = all_frames.loc[IDX[:, :, ["MCHERRY"], 0], :]

In [None]:
len(frames_to_process)

## Run trench finding

In [None]:
%%time
find_trenches_diag = diagnostics.wrap_diagnostics(
    trench_detection.hough.find_trenches, ignore_exceptions=True, pandas=True
)
trench_info_futures = {
    idx: client.submit(
        find_trenches_diag, client.submit(workflow.get_nd2_frame, **idx._asdict())
    )
    for idx, row in util.iter_index(frames_to_process)
}

In [None]:
client.cancel(trench_info_futures)

In [None]:
def as_completed(obj, with_results=True):
    if isinstance(obj, Mapping):
        futures = obj.values()
        dask_to_keys = {future.key: k for k, future in obj.items()}
    else:
        raise NotImplementedError
    for res in distributed.as_completed(futures, with_results=with_results):
        if with_results:
            future, result = res
            yield dask_to_keys[future.key], future, result
        else:
            future = res
            yield dask_to_keys[future.key], future

In [None]:
trench_info = {}
for key, fut, res in as_completed(trench_info_futures):
    trench_info[key] = res
    client.cancel(fut)

In [None]:
progress(trench_info_futures)

In [None]:
%%time
trench_info = util.apply_map_futures(
    client.gather, trench_info_futures, predicate=lambda x: x.status == "error"
)

In [None]:
%%time
%store trench_info

In [None]:
len(trench_info)

In [None]:
len(errs)

In [None]:
errs = {k: v[2] for k, v in trench_info.items() if v[2] is not None}
errs

In [None]:
%%time
trench_points, trench_diag, trench_err = workflow.unzip_trench_info(trench_info)

In [None]:
len(trench_points)

In [None]:
%%time
%store trench_points
%store trench_diag

## Analysis

In [None]:
bad_angle = trench_diag["find_trench_lines.hough_2.angle"].abs() > 2
bad_angle.sum()

In [None]:
bad_pitch = (trench_diag["find_trench_lines.hough_2.peak_func.pitch"] - 24).abs() > 1
bad_pitch.sum()

In [None]:
selected = trench_diag[bad_pitch]  # trench_diag[bad_angle | bad_period]

In [None]:
frame_stream.event(_df=selected.index.to_frame(index=False))

In [None]:
%%time
trench_points_good = trench_points[~util.multi_join(trench_points.index, bad_pitch)]

In [None]:
(len(trench_points_good), len(trench_points_good) / len(trench_points))

In [None]:
%%time
trench_bbox_futures = []
for _, trenches in trench_points_good.groupby(["filename", "position", "t"]):
    trench_bbox_futures.append(
        client.submit(workflow.get_trench_bboxes, trenches, image_limits)
    )

In [None]:
%%time
trench_bbox_results = util.apply_map_futures(
    client.gather, trench_bbox_futures, predicate=lambda x: x.status == "finished"
)
trench_bboxes = pd.concat(
    [trench_points_good, pd.concat(trench_bbox_results, axis=0)], axis=1
)

In [None]:
%%time
%store trench_bboxes

In [None]:
%store -r trench_bboxes

In [None]:
trench_bboxes_t0 = util.get_one(trench_bboxes.groupby("t"))[1]
# trench_bboxes_t0.index = trench_points_good_t0.index.droplevel('t')

# Trench finding QA

In [None]:
selected = all_frames

In [None]:
FrameStream = ui.DataframeStream.define(
    "FrameStream", selected.index.to_frame(index=False)
)
frame_stream = FrameStream()

box = ui.dataframe_browser(frame_stream)
frame_stream.event()
box

In [None]:
ui.image_viewer(frame_stream)

In [None]:
ui.show_frame_info(trench_diag, frame_stream)

In [None]:
g = ui.show_grid(selected, stream=frame_stream)
g

In [None]:
frame = workflow.get_nd2_frame(**dict(frame_stream.get_param_values()))

In [None]:
tp, diag, _ = diagnostics.wrap_diagnostics(
    trench_detection.hough.find_trenches, ignore_exceptions=False
)(frame_rot)

In [None]:
ui.show_plot_browser(diag)

# Segmentation

In [None]:
selected_trenches_segmentation = trench_bboxes[
    trench_bboxes[("info", "hough_value")] > 90
].loc[IDX[:, :, ["MCHERRY"], 0, :, :], :]

In [None]:
# frames_to_analyze = all_frames.loc[IDX[:,:1,['MCHERRY','YFP'],1:5],:]
frames_to_analyze = all_frames.loc[IDX[:, :10, ["MCHERRY", "YFP"], :], :]

In [None]:
(
    len(frames_to_analyze),
    len(all_frames.loc[IDX[:, :, ["MCHERRY", "YFP"], :], :]) / len(frames_to_analyze),
)

In [None]:
%%time
labelwise_funcs = {
    "mean": np.mean,
    "min": np.min,
    "max": np.max,
    ("p0.3", "p0.5", "p0.7", "p0.9", "p0.95"): partial(
        np.percentile, q=(30, 50, 70, 90, 95)
    ),
}
trenchwise_funcs = {"sharpness": image.sharpness, **labelwise_funcs}
framewise_funcs = {"sharpness": image.sharpness, **labelwise_funcs}

analyze_trench_func = partial(
    workflow.analyze_trenches,
    framewise_funcs=framewise_funcs,
    trenchwise_funcs=trenchwise_funcs,
    labelwise_funcs=labelwise_funcs,
    regionprops=False,
    segment_func=trench_segmentation.watershed.segment_trench,
)

analyze_trench_func = partial(client.submit, analyze_trench_func)

analysis_futures = workflow.analyze_frames_and_trenches(
    selected_trenches_segmentation, frames_to_analyze, analyze_trench_func
)

# display(trenchwise_df)

In [None]:
client.cancel(analysis_futures)

In [None]:
progress(analysis_futures)

In [None]:
util.apply_map_futures(
    client.gather, analysis_futures, predicate=lambda x: x.status == "error"
)

In [None]:
%%time
ac = distributed.as_completed(analysis_futures, with_results=True)
writers = {}
for future, dfs in ac:
    # for future in analysis_futures:
    #    dfs = client.gather(future)
    # for dfs in analysis_res:
    try:
        tables = [pa.Table.from_pandas(df) for df in dfs]
        for i in range(len(tables)):
            if i not in writers:
                writers[i] = pq.ParquetWriter(
                    "/n/scratch2/jqs1/fidelity/all/output/analysis10_ac_{}.parquet".format(
                        i
                    ),
                    tables[i].schema,
                )
            writers[i].write_table(tables[i])
        client.cancel(future)
    except Exception as e:
        print("ERROR", e)
for writer in writers.values():
    writer.close()

In [None]:
for writer in writers.values():
    writer.close()

In [None]:
%%time
analysis_res = util.apply_map_futures(
    client.gather, analysis_futures, predicate=lambda x: x.status == "finished"
)

In [None]:
%%time
framewise_df, trenchwise_df, labelwise_df = workflow.concat_unzip_dataframes(
    analysis_res
)

In [None]:
%%time
with open("/n/scratch2/jqs1/fidelity/all/output/framewise10.feather", "wb") as f:
    feather.write_feather(framewise_df, f)

In [None]:
%%time
with open("/n/scratch2/jqs1/fidelity/all/output/trenchwise10.feather", "wb") as f:
    feather.write_feather(trenchwise_df, f)

In [None]:
%%time
with open("/n/scratch2/jqs1/fidelity/all/output/labelwise10.feather", "wb") as f:
    feather.write_feather(labelwise_df, f)

In [None]:
%%time
pq.write_table(
    pa.Table.from_pandas(labelwise_df),
    "/n/scratch2/jqs1/fidelity/all/output/labelwise10.parquet",
)

In [None]:
labelwise_df.info(memory_usage="deep")

In [None]:
labelwise_df.reset_index().groupby(
    ["filename", "position", "t", "trench_set", "trench"]
).count()

# Analysis

## Load data

In [None]:
%%time
framewise_df = pq.read_pandas(
    "/n/scratch2/jqs1/fidelity/all/output/analysis3_ac_0.parquet"
).to_pandas()

In [None]:
%%time
trenchwise_df = pq.read_pandas(
    "/n/scratch2/jqs1/fidelity/all/output/analysis3_ac_1.parquet"
).to_pandas()

In [None]:
%%time
labelwise_df = pq.read_table(
    "/n/scratch2/jqs1/fidelity/all/output/analysis3_ac_2.parquet"
).to_pandas()

In [None]:
labelwise_df.index.names = [
    "filename",
    "position",
    "t",
    "trench_set",
    "trench",
    "label",
]
labelwise_df.sort_index(inplace=True)

In [None]:
%%time
trenchwise_df = feather.read_feather(
    "/home/jqs1/scratch/fidelity/all/output/trenchwise10_df2.feather"
)

In [None]:
%%time
labelwise_df = feather.read_feather(
    "/home/jqs1/scratch/fidelity/all/output/labelwise10_df2.feather"
)

In [None]:
len(labelwise_df)

In [None]:
framewise_df.head()

In [None]:
trenchwise_df.head()

In [None]:
labelwise_df.head()

## Basic stats

In [None]:
labelwise_df2 = labelwise_df.copy()
labelwise_df2.columns = ["_".join(col).strip() for col in labelwise_df.columns.values]

In [None]:
labelwise_df2.loc[:, "MCHERRY_regionprops_area"].plot(kind="hist", bins=100, log=False)

In [None]:
selected_cells = labelwise_df2[
    (50 < labelwise_df2["MCHERRY_regionprops_area"])
    & (labelwise_df2["MCHERRY_regionprops_area"] < 300)
]

In [None]:
selected_cells.loc[:, "MCHERRY_regionprops_area"].plot(kind="hist", bins=100, log=False)

In [None]:
selected_cells.loc[:, "MCHERRY_labelwise_p0.5"].plot(kind="hist", bins=100, log=False)

In [None]:
selected_cells.loc[:, "YFP_labelwise_p0.5"].plot(kind="hist", bins=100, log=True)

In [None]:
selected_cells[:10000].hvplot(
    x="MCHERRY_labelwise_p0.5", y="YFP_labelwise_p0.5", kind="scatter"
)

## Transcription errors

In [None]:
selected_cells.head()

In [None]:
selected_cells.index[0][:-1]

In [None]:
trench_bboxes.head()

In [None]:
selected_cells.head()

In [None]:
selected_cells.index[0][2:-1]

In [None]:
selected_trenches = trench_bboxes.xs(
    (*selected_cells.index[0][:2], "MCHERRY", 0, *selected_cells.index[0][3:-1]),
    drop_level=False,
)

In [None]:
selected_trenches.head()

In [None]:
workflow.get_trench_stacks(selected_trenches, all_frames, image_limits)

# Trench UI

In [None]:
LabelStream = ui.MultiIndexStream.define("LabelStream", labelwise_df.index)
label_stream = LabelStream()

box = ui.dataframe_browser(label_stream)
label_stream.event()
box

In [None]:
%%opts Layout [normalize=False]
(
    ui.trench_viewer(trench_bboxes, label_stream, channel="MCHERRY")
    + ui.trench_viewer(trench_bboxes, label_stream, channel="YFP")
).cols(1)

In [None]:
%timeit workflow.get_trench_image(trench_bboxes, channel='MCHERRY', **util.get_keys(label_stream.contents, 'filename', 'position', 't', 'trench_set', 'trench'))

In [None]:
%lprun -f

In [None]:
trench_bboxes.loc[
    ("/n/scratch2/jqs1/fidelity/all/180405_txnerr.nd2", 0, "MCHERRY", 0, 1, 3), :
].shape

In [None]:
plt.imshow(
    workflow.get_trench_image(
        trench_bboxes,
        channel="MCHERRY",
        **dissoc(label_stream.contents, "_df", "label"),
    )
)

In [None]:
workflow.get_nd2_frame(
    channel="MCHERRY",
    **util.get_keys(label_stream.contents, "filename", "position", "t"),
)

In [None]:
f = partial(util.get_keys, keys=["label"])
f(label_stream.contents)

In [None]:
label_stream.contents.keys()

In [None]:
ui.image_viewer(frame_stream)

In [None]:
ui.show_frame_info(trench_diag, frame_stream)

In [None]:
g = ui.show_grid(selected, stream=frame_stream)
g

# Memory

In [None]:
from pympler import tracker

memory_tracker = tracker.SummaryTracker()
memory_tracker.print_diff()

In [None]:
from pympler.classtracker import ClassTracker

tracker = ClassTracker()
tracker.track_object()

In [None]:
import objgraph

In [None]:
s = objgraph.by_type("IOStream")

In [None]:
s[0]

In [None]:
objgraph.show_backrefs(s[50])

In [None]:
objgraph.show_most_common_types(50)