# Imports

In [None]:
import itertools as it
import re
from functools import partial
from glob import glob
from pathlib import Path

import dask
import distributed
import nd2reader
import numpy as np
import pandas as pd
import requests
import zarr
from dask import delayed
from dask_jobqueue import SLURMCluster
from distributed import Client, LocalCluster, progress
from tqdm.auto import tqdm, trange

In [None]:
from dask.diagnostics import ProgressBar

ProgressBar().register()

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import paulssonlab.io.metadata as imd
from paulssonlab.image_analysis import *
from paulssonlab.image_analysis.ui import display_image

# Config

In [None]:
cluster = SLURMCluster(
    queue="transfer",
    walltime="02:00:00",
    memory="2GB",
    local_directory="/tmp",
    log_directory="/home/jqs1/log",
    cores=1,
    processes=1,
)
client = Client(cluster)

In [None]:
cluster

In [None]:
cluster.scale(10)

In [None]:
cluster.adapt(maximum=20)

# Inventory

In [None]:
filenames = client.submit(
    glob.glob, "/home/jqs1/research.files/Personal_Folders/Noah/23*/*.nd2"
).result()

In [None]:
filenames

In [None]:
%%time
md = {}
for filename in tqdm(filenames):
    sizes, channels = client.submit(get_nd2_metadata, filename).result()
    md[filename] = (sizes, channels)

In [None]:
selected_filenames = [
    "/home/jqs1/research.files/Personal_Folders/Noah/230127/initial_growth.nd2",
    "/home/jqs1/research.files/Personal_Folders/Noah/230203/230203_circuits.nd2",
    "/home/jqs1/research.files/Personal_Folders/Noah/230131/230131_growth_5min.nd2",
    "/home/jqs1/research.files/Personal_Folders/Noah/230131/230131_growth.nd2",
    "/home/jqs1/research.files/Personal_Folders/Noah/231101/231101_FP_calibration.nd2",
    "/home/jqs1/research.files/Personal_Folders/Noah/230915/test.nd2",
    "/home/jqs1/research.files/Personal_Folders/Noah/230125/overnight_growth.nd2",
]

In [None]:
{k: v[0] for k, v in md.items() if k in selected_filenames}

In [None]:
{k: v[1] for k, v in md.items() if "Phase-Fluor" in v[1]}

In [None]:
md = imd.parse_nd2_file_metadata(
    "/home/jqs1/scratch/microscopy/231101/231101_FP_calibration.nd2"
)

In [None]:
md.keys()

In [None]:
md["image_calibration"]["SLxCalibration"]["sObjective"]

In [None]:
%%time
metadata = {}
for filename in tqdm(selected_filenames):
    metadata[filename] = client.submit(imd.parse_nd2_file_metadata, filename).result()

In [None]:
metadata.keys()

In [None]:
{k: v["image_calibration"]["SLxCalibration"]["sObjective"] for k, v in metadata.items()}

In [None]:
metadata[""]

In [None]:
client.submit(glob.glob, "")

In [None]:
filename = ""

In [None]:
def get_nd2_metadata(filename):
    nd2 = workflow.get_nd2_reader(filename)
    return nd2.sizes, nd2.metadata["channels"]

In [None]:
sizes, channels = client.submit(get_nd2_metadata, filename).gather()

In [None]:
filename_prefix = "/home/jqs1/research.files/Personal_Folders/"
selected_filenames = """Noah/231101/231101_FP_calibration.nd2
Noah/230131/230131_growth_5min.nd2
Noah/230125/overnight_growth.nd2
Noah/230203/230203_circuits.nd2
Daniel/FISH_Paper_Data/lDE20_Data/2023-01-14_lDE20_Run_9/Experiment.nd2
Daniel/FISH_Paper_Data/lDE15_Data/2021-10-21_lDE15_Final_1/experiment.nd2
Daniel/FISH_Paper_Data/Isolates/2023-02-11_lpxK_LpxC_AB/Experiment.nd2
Carlos/Ti5/LCS3_run1/Experiment.nd2
Carlos/Ti5/LCS3_run2/Experiment.nd2
Carlos/Ti5/08072023_lcs2/GlycerolArabinoseMedia.nd2
Carlos/Ti5/08072023_lcs2/GlycerolMedia3hr.nd2
Carlos/Ti5/2019_02_07/AndersonPromoters_Phase.nd2
Luis/Imaging_Data/Bsubtilis_DegronRapamycin/2022-05-10_AF337-AF339_Ti6/tlapse-1.nd2
Luis/Imaging_Data/Bsubtilis_BarcodesTesting/2023-10-31_lLAG2_AF555-AllCycles/Experiment.nd2
Daniel/FISH_Paper_Data/lDE26_Data/2023-03-11_lDE26_Run_1/Experiment.nd2
Luis/Imaging_Data/Ecoli_Libraries/2022-08-19_MM-Ti5-lDE24/2022-08-19_MM-Ti5-lDE24.nd2
Raquel/Results/P1 grant/Gilmore/2023_07_12 RF320 Rifam/RF320.nd2
Raquel/Results/P1 grant/Gilmore/2023_07_20 RF320 Rifam/64_128_256_512_.nd2
Raquel/Results/P1 grant/Gilmore/2021_04_20 Enterococcus RF235/RF235.nd2
Raquel/Results/P1 grant/Gilmore/2023_08_01 RF320 Dapto/Control_128_256_512.nd2
Raquel/Results/P1 grant/Gilmore/2023_08_11 Tnlibrary Dapto dyes/ND2_growth/growth.nd2
Raquel/Results/P1 grant/Gilmore/2023_08_24 EF daptomycin/growth.nd2
Raquel/Results/P1 grant/Gilmore/2023_10_4 RF320/ND2_growth/Experiment_growth.nd2
Raquel/Results/P1 grant/Gilmore/2023_12_07 RF157 MM/ND2/MHCA_Dapto.nd2
Raquel/Results/P1 grant/Gilmore/2020_09_28 Enterococcus mcherry gfp/Enterococcus mcherry gfp.nd2
Raquel/Results/PhoPQ/Mother machine experiments/2021_02_14 RF230 N10 Sin N50 Sin Snake no treatment (importante)/N10 Sin N50 Sin Snake.nd2
Raquel/Results/P1 grant/Hooper/2023_03_09 RF322/ND2/Experiment.nd2
Raquel/Results/P1 grant/Hooper/2023_03_30 RF322/ND2/Experiment.nd2
Raquel/Results/P1 grant/Hooper/2023_09_06 RF322 pyocyanin/ND2/Control_Pyo_Naf_Pyo+Naf003.nd2
Mengyu/microscopy/2022-04-15 Ti6/growth-lysogen-lambda2903_A01.nd2
Mengyu/microscopy/2022-03-21 Ti6/1 growth/growth_start_with_inducer.nd2
Mengyu/microscopy/2022-03-25 Ti6/1 growth/growth001.nd2
Mengyu/microscopy/2022-12-26 Jurkat cell growth/jurkat-growth.nd2
Mengyu/microscopy/2023-05-12 Jurkat cell growth (w 10ng per ml IL-7)/growth.nd2""".split(
    "\n"
)
selected_filenames = [f"{filename_prefix}{filename}" for filename in selected_filenames]

In [None]:
%%time
metadata = {}
for filename in tqdm(selected_filenames):
    metadata[filename] = client.submit(imd.parse_nd2_file_metadata, filename).result()

In [None]:
%%time
metadata2 = {}
for filename in tqdm(selected_filenames):
    metadata2[filename] = client.submit(get_nd2_metadata, filename).result()

In [None]:
for filename in selected_filenames:
    print(metadata2[filename][0])

In [None]:
metadata[
    "/home/jqs1/research.files/Personal_Folders/Noah/230125/overnight_growth.nd2"
].keys()

In [None]:
x = metadata[
    "/home/jqs1/research.files/Personal_Folders/Noah/230125/overnight_growth.nd2"
]["image_metadata"]

In [None]:
x

In [None]:
{k: v["image_calibration"]["SLxCalibration"]["sObjective"] for k, v in metadata.items()}

# ND2 to Zarr conversion

In [None]:
output_filename = (
    f"/home/jqs1/group/jqs1/microscopy/for_janelia/{Path(str(filename)).name}"
)

In [None]:
output_filename

In [None]:
%%time
readers.convert_nd2_to_array(
    nd2,
    output_filename,
    file_axes=[],
    dataset_axes=["fov", "channel"],
    slices=dict(fov=[11], t=slice(None)),
    format="zarr",
)

In [None]:
x = h5py.File(
    "/home/jqs1/group/jqs1/microscopy/for_janelia/230830_repressilators.nd2.split.aa/fov=22.hdf5"
)

In [None]:
y = zarr.convenience.open(output_filename + ".zarr")

In [None]:
display_image(y["fov=11/channel=CFP-EM"][0, 0], scale=0.99)

In [None]:
display_image(x["channel=CFP-EM"][0, 0], scale=0.99)

## Full run

In [None]:
filename_prefix = Path("/home/jqs1/research.files/Personal_Folders/")

In [None]:
df = pd.read_csv(
    "https://docs.google.com/spreadsheets/d/1WlZof_7zlann3N6oV0uyXFVkUqkBR5fKUtIKCJd9B4s/export?format=csv",
    header=1,
)[1:]
df.columns = df.columns.str.replace(r"\s+\(.*\)$", "", regex=True)
df["Selected FOVs"] = df["Selected FOVs"].apply(
    lambda s: [int(x) for x in re.split(r"\s*,\s*", s)] if s is not np.nan else []
)

In [None]:
def glob_filename(filename):
    filename = str(filename)
    if "*" in filename:
        filename = workflow.SplitFilename(sorted(glob(filename)))
    return filename


def get_nd2_metadata(filename):
    nd2 = workflow.get_nd2_reader(glob_filename(filename))
    return nd2.sizes, nd2.metadata["channels"]


def get_nd2_frame_glob(filename, *args, **kwargs):
    return workflow.get_nd2_frame(glob_filename(filename), *args, **kwargs)

In [None]:
%%time
md = {}
for filename in tqdm(df["Path"][:-1]):
    sizes, channels = client.submit(
        get_nd2_metadata, filename_prefix / filename
    ).result()
    md[filename] = (sizes, channels)

In [None]:
print("\n".join(", ".join(x[1]) for x in md.values()))

In [None]:
%%time
idx = 35
filename = filename_prefix / df.loc[idx, "Path"]
position = 11
channel = "BF"
t = 20
print("NAME:", df.loc[idx, "Short name"])
img = client.submit(get_nd2_frame_glob, filename, position, channel, t).result()

In [None]:
display_image(img, scale=0.99, downsample=4)

In [None]:
df.loc[34, "Selected FOVs"]

In [None]:
conversions = {}
for idx in tqdm(df.index):
    remote_filename = client.submit(
        glob_filename, filename_prefix / df.loc[idx, "Path"]
    )
    short_name = df.loc[idx, "Short name"]
    output_filename = (
        f"/home/jqs1/group/jqs1/microscopy/for_janelia/{short_name}/raw.zarr"
    )
    fovs = df.loc[idx, "Selected FOVs"]
    if not fovs:
        print(f"SKIPPING: {short_name}")
        continue
    conversions[short_name] = client.submit(
        readers.convert_nd2_to_array,
        remote_filename,
        output_filename,
        file_axes=[],
        dataset_axes=["fov", "channel"],
        slices=dict(fov=fovs, t=slice(None)),
        format="zarr",
    )

In [None]:
z = zarr.open(
    "/home/jqs1/group/jqs1/microscopy/for_janelia/220510_bsub_degron_rap/raw.zarr"
)

In [None]:
display_image(z["fov=5/channel=BF"][100], scale=0.99, downsample=4)