# Imports

In [None]:
import numpy as np
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
import pyarrow.feather as feather
import zarr
import dask
from dask import delayed
import distributed
from distributed import Client, LocalCluster, progress
from dask_jobqueue import SLURMCluster
import streamz
import streamz.dataframe as sdf
import holoviews as hv
from holoviews.streams import Stream, param, Selection1D
from holoviews.operation.datashader import regrid
from bokeh.models.tools import HoverTool, TapTool
import matplotlib.pyplot as plt
import qgrid
import ipywidgets as widgets
from tqdm import tnrange, tqdm, tqdm_notebook
import warnings
from functools import partial
from cytoolz import *
from operator import getitem
import nd2reader
from importlib import reload
import traceback
import hvplot.pandas
import param
import parambokeh
from traitlets import All
import cachetools
from collections import namedtuple, defaultdict
from collections.abc import Mapping, Sequence
from numbers import Number
import skimage.morphology
import scipy
from glob import glob
import asyncio
import time

IDX = pd.IndexSlice

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# from processing import *
# from trench_detection import *
# from trench_segmentation import *
# from trench_segmentation.watershed import *
# from util import *
# from ui import *
import common, trench_detection, util
import ui, diagnostics, metadata
import workflow, image, geometry
import trench_detection.hough, trench_detection.core
import trench_segmentation.watershed

In [None]:
#%load_ext line_profiler
hv.extension("bokeh")
%matplotlib inline
tqdm.monitor_interval = 0
asyncio.get_event_loop().set_debug(True)
import logging

logging.basicConfig(level=logging.DEBUG)

# Restore data

In [None]:
%store -r trench_points
%store -r trench_diag
%store -r trench_bboxes
trench_bboxes_t0 = util.get_one(trench_bboxes.groupby("t"))[1]

# Analysis

## Arrow

In [None]:
arrow_filename = (
    "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_2.subset1000.arrow"
)

In [None]:
columns = ["position", "label"]

In [None]:
columns_to_drop = list(set(c.name for c in table0.columns) - set(columns))

In [None]:
columns_to_drop

In [None]:
table1 = table0.drop(columns_to_drop)

In [None]:
reader = pa.open_stream(arrow_filename)
table0 = pa.Table.from_batches([reader.read_next_batch()])
imos = pa.BufferOutputStream()
buffer = imos.getvalue()
writer = pa.RecordBatchStreamWriter(buffer, table0)

In [None]:
def read_arrow(arrow_filename, columns, categorical_columns=None, batch_size=1000):
    reader = pa.open_stream(arrow_filename)
    table0 = pa.Table.from_batches([reader.read_next_batch()])
    columns_to_drop = list(set(c.name for c in table0.columns) - set(columns))
    table1 = table0.drop(columns_to_drop)
    imos = pa.BufferOutputStream()
    writer = pa.RecordBatchStreamWriter(imos, table1.schema)
    t0 = time.time()
    for i, batches in enumerate(util.grouper(reader, batch_size)):
        if True:  # i % 100 == 0:
            t = time.time()
            dt = t - t0
            t0 = t
            print("batch", i, "time {:.2f}".format(dt))
        table = pa.Table.from_batches(batches).drop(columns_to_drop)
        print("    rows per second", len(table) / dt)
        writer.write_table(table)
    # with pq.ParquetWriter(parquet_filename, table0.schema) as writer:
    #     writer.write_table(table0)
    #    for batches in util.grouper(reader, batch_size):
    #         table = pa.Table.from_batches(batches)
    #         writer.write_table(table)
    output_reader = pa.open_stream(imos.getvalue())
    return output_reader

In [None]:
# arrow_filename = '/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_2.subset1000.arrow'
# arrow_filename = '/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_2.subset15000.arrow'
# arrow_filename = '/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_2.subset15000.arrow'
arrow_filename = "/tmp/analysis_full_stream11_2.arrow"

In [None]:
%%time
# cols = ['position', 'label', 'filename', 't', 'trench', 'trench_set', 'channel']
# cols = ["('YFP', 'labelwise', 'p0.9')"]
cols = [
    "filename",
    "position",
    "channel",
    "t",
    "trench_set",
    "trench",
    "label",
    "('YFP', 'labelwise', 'p0.9')",
    "('MCHERRY', 'labelwise', 'p0.9')",
    "('YFP', 'regionprops', 'area')",
]
b = read_arrow(arrow_filename, cols)

In [None]:
%time table = b.read_all()

In [None]:
len(table)

In [None]:
%%time
tp = table.to_pandas(zero_copy_only=True)

In [None]:
%%time
tp.info()  # memory_usage='deep')

In [None]:
%%time
pq.write_table(
    table, "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_2.3cols.parquet"
)

In [None]:
%%time
pq.write_table(
    table,
    "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_2.3cols.rg1000.parquet",
    row_group_size=1000,
)

In [None]:
%%time
pq.write_table(
    table,
    "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_2.3cols.rg1000000.parquet",
    row_group_size=1000000,
)

In [None]:
%%time
pq.write_table(
    table,
    "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_2.3cols.rg100000.parquet",
    row_group_size=100000,
)

In [None]:
%%time
pq.write_table(
    pa.Table.from_pandas(tp),
    "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_2.3cols.roundtrip.parquet",
)

In [None]:
pq.write_table?

In [None]:
%%time
tp = pq.read_pandas(
    "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_2.3cols.rg100000.parquet"
)  # .to_pandas()

In [None]:
# parquet_filename = '/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_2.3cols.rg100000.parquet'
parquet_filename = "/tmp/analysis_full_stream11_2.3cols.rg100000.parquet"
f = pq.ParquetFile(parquet_filename)

In [None]:
f.num_row_groups

In [None]:
len(f.read_row_group(0))

In [None]:
t = f.read_row_group(0)

In [None]:
t

In [None]:
%%time
t0 = time.time()
for i in range(f.num_row_groups):
    row_group = f.read_row_group(i)
    if i % 100 == 0:
        t = time.time()
        dt = t - t0
        t0 = t
        print("batch", i, "time {:.2f}".format(dt))

In [None]:
%%time
tpq = pa.concat_tables([f.read_row_group(i) for i in range(f.num_row_groups)])

In [None]:
c = tpq.column(3)

In [None]:
tpq2 = tpq.set_column(3, tpq.column(3).dictionary_encode())

In [None]:
%%time
pq.write_table(
    tpq2,
    "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_2.3cols.rg10000.dict.parquet",
    row_group_size=10000,
)

In [None]:
pa.write

In [None]:
%%time
c.dictionary_encode()

In [None]:
%%time
tpqs = tpq.to_pandas(strings_to_categorical=True)

In [None]:
tpqs

In [None]:
x = pa.array(list("1" * 2**30))

demo = "demo.parquet"


def scenario():
    t = pa.Table.from_arrays([x], ["x"])
    writer = pq.ParquetWriter(demo, t.schema)
    for i in range(2):
        writer.write_table(t)
    writer.close()

    pf = pq.ParquetFile(demo)

    # pyarrow.lib.ArrowIOError: Arrow error: Invalid: BinaryArray cannot contain more than 2147483646 bytes, have 2147483647
    t2 = pf.read()

In [None]:
scenario()

In [None]:
%%time
f.read().to_pandas()

In [None]:
f.read_row_group(10000).to_pandas()

In [None]:
%%time
fastparquet.write(
    "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_2.3cols.fastparquet",
    tp,
)

In [None]:
tp

## Parquetify

In [None]:
def parquetify(arrow_filename, parquet_filename=None, batch_size=1000):
    if parquet_filename is None:
        parquet_filename = arrow_filename.replace(".arrow", ".parquet")
    reader = pa.open_stream(arrow_filename)
    table0 = pa.Table.from_batches([reader.read_next_batch()])
    with pq.ParquetWriter(parquet_filename, table0.schema) as writer:
        writer.write_table(table0)
        for batches in util.grouper(reader, batch_size):
            table = pa.Table.from_batches(batches)
            writer.write_table(table)

In [None]:
%%time
parquetify(
    "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_0.arrow",
    parquet_filename="/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_0.test.parquet",
)

In [None]:
z = pq.ParquetFile(
    "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_1.parquet"
)

In [None]:
z.num_row_groups

In [None]:
%%time
pq.read_table(
    "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_0.test.parquet"
).to_pandas()

In [None]:
%%time
parquetify("/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_0.arrow")

In [None]:
%%time
parquetify("/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_1.arrow")

In [None]:
%%timeit
open(
    "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_1.parquet", "rb"
).read()

In [None]:
%%time
pq.read_table(
    "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_1.parquet", nthreads=4
).to_pandas()

## Read Parquet from memory

In [None]:
buf = pa.BufferOutputStream()

In [None]:
in_file = pa.OSFile(
    "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_1.arrow"
)

In [None]:
%%time
buf.upload(in_file, buffer_size=2**20)

In [None]:
bufr = pa.BufferReader(buf.getvalue())

In [None]:
bufr.seek(0)

In [None]:
%%time
t = pa.open_stream(bufr).read_all()

In [None]:
in_file.seek(0)

In [None]:
pa.MemoryMappedFile?

In [None]:
in_file2 = pa.MemoryMappedFile(
    "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_1.arrow"
)

In [None]:
in_file2 = pa.memory_map(
    "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_1.arrow"
)

In [None]:
in_file2.seek(0)

In [None]:
in_file2.read(100)

In [None]:
%%time
tt = pa.open_stream(in_file2).read_all()

In [None]:
t = pq.read_table(
    "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_1.parquet", nthreads=1
)

In [None]:
%%time
pq.write_table(
    t,
    "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_1.test.parquet",
    compression="zstd",
    row_group_size=10000,
    version="2.0",
)

In [None]:
%%time
pq.read_table(
    "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_1.test.parquet",
    nthreads=4,
)

In [None]:
%prun pq.read_table('/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_1.parquet', nthreads=1)

In [None]:
%prun pq.ParquetFile(bufr).read()

In [None]:
%%time
pq.ParquetFile(bufr).read()

## Parquetify2

In [None]:
def copy_arrow(in_filename, out_filename, length=None, batch_size=1000):
    in_file = pa.memory_map(in_filename)
    reader = pa.RecordBatchStreamReader(in_file)
    out_file = pa.OSFile(out_filename, "wb")
    table0 = pa.Table.from_batches([reader.read_next_batch()])
    writer = pa.RecordBatchStreamWriter(out_file, table0.schema)
    writer.write_table(table0)
    if length is not None:
        reader = take(length, reader)
    for batches in util.grouper(reader, batch_size):
        table = pa.Table.from_batches(batches)
        writer.write_table(table)

In [None]:
%%time
copy_arrow(
    "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_2.arrow",
    "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_2.subset.arrow",
    batch_size=1000,
    length=1000,
)

In [None]:
%%time
copy_arrow(
    "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_2.subset.arrow",
    "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_2.subset.rebatched1000.arrow",
    batch_size=1000,
    length=None,
)

In [None]:
%%time
copy_arrow(
    "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_2.subset.arrow",
    "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_2.subset.rebatched10000.arrow",
    batch_size=10000,
    length=None,
)

In [None]:
%%time
copy_arrow(
    "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_2.subset.arrow",
    "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_2.subset.rebatched100.arrow",
    batch_size=100,
    length=None,
)

In [None]:
%%time
parquetify2(
    "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_2.subset.arrow",
    batch_size=1,
    length=1000,
)

In [None]:
%%time
parquetify2(
    "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_2.subset.rebatched100.arrow",
    batch_size=1,
    length=1000,
)

In [None]:
%%time
parquetify2(
    "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_2.subset.rebatched1000.arrow",
    batch_size=1,
    length=1000,
)

In [None]:
%%time
parquetify2(
    "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_2.subset.rebatched10000.arrow",
    batch_size=1,
    length=1000,
)

In [None]:
in_file = pa.OSFile(
    "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_2.subset.arrow"
)

In [None]:
reader = pa.RecordBatchStreamReader(in_file)

In [None]:
batch = reader.read_next_batch()

In [None]:
batches = list(take(10, reader))

In [None]:
table = pa.Table.from_batches(batches)

In [None]:
len(table)

In [None]:
batches2 = table.to_batches(chunksize=10000)

In [None]:
len(batches2)

In [None]:
len(batches[0])

In [None]:
len(batches2[0])

In [None]:
col = table.column(0)

In [None]:
col.data.

In [None]:
pa.Table.from_batches([batches2[0]]).to_pandas().info(memory_usage="deep")

In [None]:
len(batches2[0])

In [None]:
len(batch)

In [None]:
def parquetify2(arrow_filename, parquet_filename=None, batch_size=1000, length=None):
    if parquet_filename is None:
        parquet_filename = arrow_filename.replace(".arrow", ".parquet4")
    arrow_file = pa.OSFile(arrow_filename)
    # arrow_file = pa.memory_map(arrow_filename)
    # parquet_mmap = pa.memory_map(parquet_filename, 'wb')
    reader = pa.open_stream(arrow_file)
    table0 = pa.Table.from_batches([reader.read_next_batch()])
    if length is not None:
        reader = take(length, reader)
    with pq.ParquetWriter(parquet_filename, table0.schema) as writer:
        # with pq.ParquetWriter(parquet_mmap, table0.schema) as writer:
        writer.write_table(table0)
        for batches in util.grouper(reader, batch_size):
            table = pa.Table.from_batches(batches)
            writer.write_table(table)
    # arrow_file.close()
    # parquet_mmap.flush()
    # parquet_mmap.close()

In [None]:
%%time
copy_arrow(
    "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_2.arrow",
    "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_2.subset15000.arrow",
    length=15000,
    batch_size=1,
)

In [None]:
%%time
parquetify2(
    "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_2.arrow",
    batch_size=1000,
)

In [None]:
cols = [
    "('YFP', 'labelwise', 'p0.9')",
    "('MCHERRY', 'labelwise', 'p0.9')",
    "('YFP', 'regionprops', 'mean')",
    "('YFP', 'regionprops', 'area')",
]

In [None]:
# cols = ["('YFP', 'labelwise', 'p0.9')"]
# cols = ['position']
# cols = ["('YFP', 'labelwise', 'p0.9')", 'filename', 'position']
# cols = ["('YFP', 'labelwise', 'p0.9')", 'position', 'trench', 'label']
cols = ["('YFP', 'labelwise', 'p0.9')", "position", "trench", "label", "filename"]

In [None]:
%%time
# reader = pq.ParquetFile('/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_2.parquet4')
reader = pq.ParquetFile(
    "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_2.onecol.rg10000.parquet4"
)
# for i in range(0,reader.num_row_groups):
#     print('row group', i)
#     reader.read_row_group(i, use_pandas_metadata=True)
tables = [
    reader.read_row_group(i, columns=None, nthreads=1, use_pandas_metadata=False)
    for i in range(reader.num_row_groups)
]
table = pa.concat_tables(tables)
# t = table.to_pandas(use_threads=True, strings_to_categorical=True)

In [None]:
import fastparquet

In [None]:
%%time
tf = fastparquet.ParquetFile(
    "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_2.parquet4"
)

In [None]:
next(tf.iter_row_groups())

In [None]:
tf.row_groups[0]

In [None]:
tf.read_row_group(0, [("YFP", "labelwise", "p0.9")], {})

In [None]:
%%time
tf = fastparquet.ParquetFile(
    "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_2.parquet4"
)  # .to_pandas()

In [None]:
tff = tf.to_pandas(columns=["position"])

In [None]:
%%time
tf = fastparquet.ParquetFile(
    "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_2.onecol.rg10000.parquet4"
).to_pandas()

In [None]:
pd.read_parquet?

In [None]:
%%time
reader = pq.ParquetFile(
    "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_2.parquet4"
)
tables = [
    reader.read_row_group(i, columns=cols, nthreads=4, use_pandas_metadata=False)
    for i in range(reader.num_row_groups)
]
table = pa.concat_tables(tables)
# t = table.to_pandas(use_threads=True, strings_to_categorical=True)

In [None]:
%%time
t = table.to_pandas(use_threads=True, strings_to_categorical=True)

In [None]:
%%time
pq.write_table(
    pa.Table.from_pandas(t),
    "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_2.onecol.rg10000.parquet4",
    row_group_size=10000,
)

In [None]:
%%time
tsel = pq.read_pandas(
    "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_2.onecol.rg10000.parquet4"
).to_pandas()

In [None]:
t.head()

In [None]:
import sys

In [None]:
len(t[("YFP", "labelwise", "p0.9")].data)

In [None]:
t.info(memory_usage="deep")

In [None]:
%%time
t2 = t.reset_index()

In [None]:
%%time
t2.info(memory_usage="deep")

In [None]:
%%time
t = pq.read_pandas(
    "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_2.parquet4",
    nthreads=0,
    columns=cols,
)  # .to_pandas()

In [None]:
%%time
tp = t.to_pandas(use_threads=True)

In [None]:
tp.info(memory_usage="deep")

In [None]:
reader = pq.ParquetFile(
    pa.OSFile("/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_2.parquet4")
)

In [None]:
a = reader.read_row_group(0)

In [None]:
a.schema

In [None]:
len(t)

In [None]:
%%time
tt = (
    pa.open_stream(
        pa.memory_map(
            "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_2.subset1000.arrow"
        )
    )
    .read_all()
    .to_pandas()
)

In [None]:
%%time
ttt = (
    pa.open_stream(
        pa.OSFile(
            "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_2.subset1000.arrow"
        )
    )
    .read_all()
    .to_pandas()
)

In [None]:
%%time
t.to_pandas()

In [None]:
%%time
tt.to_pandas()

In [None]:
tt.to_pandas?

In [None]:
%prun parquetify2('/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_0.arrow')

## Load data

In [None]:
%%time
framewise_df = pa.open_stream(
    "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_0.arrow"
).read_pandas()

In [None]:
framewise_df.info(memory_usage="deep")

In [None]:
s = pa.open_stream("/n/scratch2/jqs1/fidelity/all/output/analysis50_stream_0.arrow")

In [None]:
for a in take(2, s):
    print(a)

In [None]:
s.read_next_batch?

In [None]:
s.read_pandas()

In [None]:
%prun pa.open_stream('/n/scratch2/jqs1/fidelity/all/output/analysis50_stream_0.arrow').read_all()#.read_pandas()

In [None]:
%prun pa.open_stream('/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_0.arrow').read_all()#.read_pandas()

In [None]:
%%time
trenchwise_df = pa.open_stream(
    "/n/scratch2/jqs1/fidelity/all/output/analysis_full_stream11_1.arrow"
).read_pandas()

In [None]:
trenchwise_df.info(memory_usage="deep")

In [None]:
%%time
labelwise_df = pq.read_pandas(
    "/n/scratch2/jqs1/fidelity/all/output/analysis50_full_2.parquet"
).to_pandas()

In [None]:
labelwise_df.info(memory_usage="deep")

In [None]:
# labelwise_df.index.names = ['filename', 'position', 't', 'trench_set', 'trench', 'label']
# labelwise_df.sort_index(inplace=True)

In [None]:
len(labelwise_df)

In [None]:
framewise_df.head()

In [None]:
trenchwise_df.head()

In [None]:
labelwise_df.head()

## Basic stats

In [None]:
# labelwise_df2 = labelwise_df.copy()
labelwise_df.columns = ["_".join(col).strip() for col in labelwise_df.columns.values]

In [None]:
labelwise_df.loc[:, "MCHERRY_regionprops_area"].plot(kind="hist", bins=100, log=False)

In [None]:
selected_cells = labelwise_df2[
    (50 < labelwise_df2["MCHERRY_regionprops_area"])
    & (labelwise_df2["MCHERRY_regionprops_area"] < 300)
]

In [None]:
selected_cells.loc[:, "MCHERRY_regionprops_area"].plot(kind="hist", bins=100, log=False)

In [None]:
selected_cells.loc[:, "MCHERRY_labelwise_p0.5"].plot(kind="hist", bins=100, log=False)

In [None]:
selected_cells.loc[:, "YFP_labelwise_p0.5"].plot(kind="hist", bins=100, log=True)

In [None]:
labelwise_df[:10000].hvplot(
    x="MCHERRY_labelwise_p0.5", y="YFP_labelwise_p0.5", kind="scatter"
)

In [None]:
yfp_cols = [c for c in labelwise_df.columns if c.startswith("YFP_labelwise_p")]
mcherry_cols = [c for c in labelwise_df.columns if c.startswith("MCHERRY_labelwise_p")]

In [None]:
d = labelwise_df.loc[
    IDX["/n/scratch2/jqs1/fidelity/all/180405_txnerr002.nd2", 2, :, 1, 50:60], yfp_cols
]

In [None]:
d.hvplot.scatter("t", "YFP_labelwise_p0.95", by="trench")

In [None]:
%%opts Scatter (size=3)
hv.Layout(
    [
        d.hvplot.scatter("t", c, by="trench", width=500, height=300)
        for c in reversed(yfp_cols)
    ]
).cols(1)

In [None]:
d2 = labelwise_df.loc[
    IDX["/n/scratch2/jqs1/fidelity/all/180405_txnerr002.nd2", 3, :, :, :], yfp_cols
]

In [None]:
%%opts Scatter (size=3) [show_legend=False]
hv.Layout(
    [
        d2.hvplot.scatter("t", c, by="trench", width=500, height=300)
        for c in reversed(yfp_cols)
    ]
).cols(1)

In [None]:
%%opts Scatter (size=3) [show_legend=False]
def get_plot(pos):
    c = "YFP_labelwise_p0.9"
    d = labelwise_df.loc[
        IDX["/n/scratch2/jqs1/fidelity/all/180405_txnerr002.nd2", pos, :, :, :], c
    ]
    return d.hvplot.scatter("t", c, by="trench", width=500, height=300)


hv.HoloMap(
    {
        pos: get_plot(pos)
        for pos in labelwise_df.index._get_level_values(1, unique=True)[:20]
    }
)

In [None]:
labelwise_df.head().loc[:, cols]

In [None]:
d = labelwise_df.loc[
    IDX["/n/scratch2/jqs1/fidelity/all/180405_txnerr002.nd2", 2, :, 1, 50:60], cols
]
m = pd.melt(d.loc[:, cols])
# m.index = d.index
# m

# Plot tapping

In [None]:
LabelStream = ui.MultiIndexStream.define("LabelStream", labelwise_df.index)
label_stream = LabelStream()
box = ui.dataframe_browser(label_stream)
label_stream.event()
box

In [None]:
p = d.hvplot.scatter("t", "YFP_labelwise_p0.95").options(tools=["tap", "hover"])
ui.selection_to_stream(p, label_stream)
p

In [None]:
def cb(**kwargs):
    dat = labelwise_df.loc[
        workflow.stream_slice(
            labelwise_df.index.names, kwargs, t=slice(None), label=slice(None)
        ),
        :,
    ]
    plot = dat.hvplot.scatter("t", "YFP_labelwise_p0.95").options(
        tools=["tap", "hover"]
    )
    # ui.selection_to_stream(plot, label_stream)
    return plot


ui.viewer(cb, label_stream)

## Transcription errors

In [None]:
selected_cells.head()

In [None]:
selected_cells.index[0][:-1]

In [None]:
trench_bboxes.head()

In [None]:
selected_cells.head()

In [None]:
selected_cells.index[0][2:-1]

In [None]:
selected_trenches = trench_bboxes.xs(
    (*selected_cells.index[0][:2], "MCHERRY", 0, *selected_cells.index[0][3:-1]),
    drop_level=False,
)

In [None]:
selected_trenches.head()

In [None]:
workflow.get_trench_stacks(selected_trenches, all_frames, image_limits)

# Trench UI

In [None]:
trenchwise_df.head(10)

In [None]:
trench_bboxes.head(20)

In [None]:
trench_bboxes[trench_bboxes[("info", "hough_value")] > 90].loc[
    IDX[:, :, ["MCHERRY"], 0, :, :], :
].head(10)

In [None]:
trench_bboxes.head(10)

In [None]:
selected_trenches_segmentation.head(10)

In [None]:
# LabelStream = ui.MultiIndexStream.define('LabelStream', trench_bboxes.index)
LabelStream = ui.MultiIndexStream.define("LabelStream", labelwise_df.index)
# LabelStream = ui.MultiIndexStream.define('LabelStream', selected_trenches_segmentation.index)
label_stream = LabelStream()

box = ui.dataframe_browser(label_stream)
label_stream.event()
box

In [None]:
label_stream.contents

In [None]:
%%opts Layout [normalize=False]
%%opts Image [width=500 height=300]
# image_callback = partial(ui.get_trench_set_overlay, get_frame_func=workflow.get_nd2_frame_cached)
# p = ui.trench_set_viewer(trench_bboxes_t0, label_stream, channel='MCHERRY', image_callback=image_callback).options({'Bounds': dict(tools=['tap'])})
p = ui.get_trench_set_overlay(
    trench_bboxes_t0,
    channel="MCHERRY",
    **{
        k: label_stream.contents[k]
        for k in ("filename", "position", "t", "trench_set", "trench")
    },
)
p = p.options({"Bounds": dict(tools=["tap"])})
s = Selection1D(source=p)
p

In [None]:
s

In [None]:
s

In [None]:
%%opts Layout [normalize=False]
%%opts Image [width=800 height=150]
image_callback = partial(
    ui.get_trench_set_overlay, get_frame_func=workflow.get_nd2_frame_cached
)
(
    ui.trench_set_viewer(
        trench_bboxes_t0, label_stream, channel="MCHERRY", image_callback=image_callback
    )
    + ui.trench_set_viewer(
        trench_bboxes_t0, label_stream, channel="YFP", image_callback=image_callback
    )
).cols(1)

In [None]:
%%opts Layout [normalize=False]
%%output size=100
hover = HoverTool(
    tooltips=[
        ("(x,y)", "(@x{0[.]0}, @y{0[.]0})"),
        ("value", "@z"),
    ]
)
cb = compose(partial(ui.hover_image, hover), ui._trench_img, workflow.get_trench_image)
# cb = workflow.get_trench_image
(
    ui.trench_viewer(trench_bboxes, label_stream, channel="MCHERRY", image_callback=cb)
    + ui.trench_viewer(trench_bboxes, label_stream, channel="YFP", image_callback=cb)
).cols(1)

In [None]:
import bokeh

In [None]:
compose(partial(ui.hover_image, hover), ui._trench_img, workflow.get_trench_image)(
    trench_bboxes,
    "/n/scratch2/jqs1/fidelity/all/180405_txnerr002.nd2",
    3,
    "MCHERRY",
    0,
    1,
    56,
)

In [None]:
yfp = labelwise_df.loc[
    IDX["/n/scratch2/jqs1/fidelity/all/180405_txnerr002.nd2", 3, 3, 1, 61],
    ("YFP", "labelwise", "mean"),
]

In [None]:
yfp.plot.hist(bins=50)

In [None]:
yfp2 = labelwise_df.loc[
    IDX["/n/scratch2/jqs1/fidelity/all/180405_txnerr002.nd2", 3],
    ("YFP", "labelwise", "p0.95"),
]

In [None]:
yfp2[yfp2 < 300].plot.hist(bins=50, log=True)

In [None]:
yfp2[yfp2 > 200]

In [None]:
yfp2.loc[IDX[:, 1, 60]]

In [None]:
yfp2.swaplevel(0, 1).swaplevel(1, 2).sort_index()

In [None]:
yfp2[yfp2 > 130].groupby(["trench_set", "trench"]).filter(lambda x: x.count() > 10)

In [None]:
yfp[yfp > 130]

In [None]:
labelwise_df.xs(
    IDX["/n/scratch2/jqs1/fidelity/all/180405_txnerr002.nd2", 3, 2, 1, 56],
    drop_level=False,
)["YFP"]

In [None]:
labelwise_df.loc[
    IDX["/n/scratch2/jqs1/fidelity/all/180405_txnerr002.nd2", 0, 2, 1, 56], "MCHERRY"
]

In [None]:
plt.imshow(
    workflow.get_trench_image(
        trench_bboxes,
        channel="MCHERRY",
        **dissoc(label_stream.contents, "_df", "label"),
    )
)

In [None]:
workflow.get_nd2_frame(
    channel="MCHERRY",
    **util.get_keys(label_stream.contents, "filename", "position", "t"),
)

In [None]:
f = partial(util.get_keys, keys=["label"])
f(label_stream.contents)

In [None]:
label_stream.contents.keys()

In [None]:
ui.image_viewer(frame_stream)

In [None]:
ui.show_frame_info(trench_diag, frame_stream)

In [None]:
g = ui.show_grid(selected, stream=frame_stream)
g