In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import os
import cv2
import numpy as np
import pyarrow as pa
import toml
import glob
import joblib
import h5py
import warnings
import pandas as pd
from tqdm.auto import tqdm
from markovids import vid
from qd_analysis.util import clean_df
from scipy import signal
from joblib import Parallel, delayed

In [3]:
segmentation_dir = "_segmentation_tau-0-pretrain"

In [4]:
params = {
    "max_lags": 200,
    "spacing": 250,
    "force": True,
    "mask_non_mouse_pixels": False,
}

## User functions

In [5]:
def get_spatial_autocorr(
    dat_file,
    spacing=200,
    # n_surrogates=25,
    reader_kwargs={"threads": 2},
    distortion_coeffs=None,
    intrinsic_matrix=None,
    segmentation_dir=segmentation_dir,
    bground_dir="_bground",
    output_dir="_autocorr",
    force=False,
    mask_non_mouse_pixels=False,
    max_lags=100,
):
    metadata = toml.load(os.path.join(os.path.dirname(dat_file), "../metadata.toml"))
    dirname, fname_reflectance = os.path.split(dat_file.replace("fluorescence", "reflectance"))

    fname_reflectance = os.path.splitext(os.path.basename(fname_reflectance))[0]
    fname_fluorescence = os.path.splitext(os.path.basename(dat_file))[0]
    cam_name = fname_reflectance.replace("-reflectance", "")

    save_file = os.path.join(dirname, output_dir, f"{cam_name}_autocorr-data.pkl")
    if os.path.exists(save_file) and not force:
        try:
            results = joblib.load(save_file)
            return results
        except Exception as e:
            print(e)
            pass

    if segmentation_dir is not None:
        segmentation_path = os.path.join(dirname, segmentation_dir, f"{fname_reflectance}.hdf5")
    else:
        segmentation_path = None

    bground_path = os.path.join(dirname, bground_dir, f"{fname_fluorescence}.hdf5")
    # save_file = os.path.join(dirname, output_dir, f"{fname_fluorescence}.parquet")

    os.makedirs(os.path.join(dirname, output_dir), exist_ok=True)
    os.makedirs(os.path.join(dirname, bground_dir), exist_ok=True)

    reader = vid.io.AutoReader(
        dat_file,
        **reader_kwargs,
    )
    frame_range = range(0, reader.nframes, spacing)
    # print(len(list(frame_range)))

    if (segmentation_path is not None) and os.path.exists(segmentation_path):
        with h5py.File(segmentation_path) as f:
            masks = f["labels"][frame_range]
            masks = masks.astype("uint8")
    elif mask_non_mouse_pixels:
        warnings.warn(f"No mask found {dat_file}")
        reader.close()
        return None
    elif not mask_non_mouse_pixels:
        # if we're not masking skip...
        pass

    if os.path.exists(bground_path):
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            with h5py.File(bground_path, "r") as f:
                rolling_bgrounds = f["bground"][()]
                idxs = f["frame_idxs"][()]
            frames = reader.get_frames(frame_range)
            # frames = reader.undistort_frames(frames)
    else:
        warnings.warn(f"No bground found {dat_file}")
        reader.close()
        return None

    bground_sub = np.zeros(frames.shape, dtype="int16")
    for i, (_idx, _frame) in enumerate(zip(frame_range, frames)):
        use_bground = np.argmin(np.abs(idxs - _idx))
        bground_sub[i] = np.clip(_frame - rolling_bgrounds[use_bground], 0, 255)
        if mask_non_mouse_pixels:
            bground_sub[i][masks[i] <= 0] = 0  # mask out non-mouse stuff... # DO WE NEED THIS???
        else:
            pass

    if intrinsic_matrix is not None:
        for i in range(len(bground_sub)):
            bground_sub[i] = cv2.undistort(bground_sub[i], intrinsic_matrix, distortion_coeffs)

    corrs = []
    raw_corrs = []
    for i in range(len(bground_sub)):
        corr_frame = bground_sub[i].astype("float32").copy()
        corr_frame -= corr_frame.mean()
        _corr = signal.fftconvolve(corr_frame, corr_frame[::-1, ::-1], mode="full")
        sz = _corr.shape
        my, mx = sz[0] // 2, sz[1] // 2
        _corr_norm = _corr / (corr_frame.size * np.var(corr_frame))
        corrs.append(_corr_norm[my - max_lags : my + max_lags][:, mx - max_lags : mx + max_lags])
        raw_corrs.append(_corr[my - max_lags : my + max_lags][:, mx - max_lags : mx + max_lags])

    ave_corr = np.mean(corrs, axis=0)
    ave_raw_corr = np.mean(raw_corrs, axis=0)
    nframes, height, width = bground_sub.shape

    results = {
        "ave_corr": ave_corr,
        "ave_raw_corr": ave_raw_corr,
        "intrinsic_matrix": intrinsic_matrix,
        "distortion_coeffs": distortion_coeffs,
        "start_time": metadata["start_time"],
        "filename": dat_file,
        "max_lags": max_lags,
        "im_size": bground_sub[0].size,
        "im_shape": bground_sub[0].shape,
    }
    for k, v in metadata["user_input"].items():
        results[k] = v
    joblib.dump(results, save_file)
    return results
    # return bground_sub, metadata

# Quantify fluorescence length-scale

In [6]:
root_dir = "/storage/home/hcoda1/4/jmarkowitz30/shared_folder/active_lab_members/markowitz_jeffrey/active_projects/quantum_dots/"

In [7]:
base_dir = os.path.join(root_dir, "timecourse_01")
fluo_files = sorted(glob.glob(os.path.join(base_dir, "**", "Basler*fluorescence.avi"), recursive=True))

base_dir = os.path.join(root_dir, "timecourse_01_agarose_beads")
fluo_files += sorted(glob.glob(os.path.join(base_dir, "**", "Basler*fluorescence.avi"), recursive=True))

In [8]:
calibration_data = toml.load(os.path.join(root_dir, "timecourse_01_calibration.toml"))

In [9]:
# get subject names and filter that stuff...
metadata = {}
for _file in tqdm(fluo_files):
    metadata[_file] = toml.load(os.path.join(os.path.dirname(_file), "../metadata.toml"))

  0%|          | 0/780 [00:00<?, ?it/s]

In [10]:
delays = []
for _file in fluo_files:
    cam = os.path.basename(_file).replace("-fluorescence.avi", "")
    delays.append(
        delayed(get_spatial_autocorr)(
            _file,
            intrinsic_matrix=np.array(calibration_data["intrinsics"][cam]),
            distortion_coeffs=np.array(calibration_data["distortion_coeffs"][cam]),
            **params,
        )
    )
print(len(delays))
dat = Parallel(n_jobs=18, verbose=10, backend="multiprocessing")(delays)

780


[Parallel(n_jobs=18)]: Using backend MultiprocessingBackend with 18 concurrent workers.
[Parallel(n_jobs=18)]: Done   5 tasks      | elapsed:   42.7s
[Parallel(n_jobs=18)]: Done  14 tasks      | elapsed:   49.4s
[Parallel(n_jobs=18)]: Done  25 tasks      | elapsed:  1.4min
[Parallel(n_jobs=18)]: Done  36 tasks      | elapsed:  1.8min
[Parallel(n_jobs=18)]: Done  49 tasks      | elapsed:  2.3min
[Parallel(n_jobs=18)]: Done  62 tasks      | elapsed:  2.9min
[Parallel(n_jobs=18)]: Done  77 tasks      | elapsed:  3.5min
[Parallel(n_jobs=18)]: Done  92 tasks      | elapsed:  4.0min
[Parallel(n_jobs=18)]: Done 109 tasks      | elapsed:  4.6min
[Parallel(n_jobs=18)]: Done 126 tasks      | elapsed:  5.3min
[Parallel(n_jobs=18)]: Done 145 tasks      | elapsed:  6.0min
[Parallel(n_jobs=18)]: Done 164 tasks      | elapsed:  6.9min
[Parallel(n_jobs=18)]: Done 185 tasks      | elapsed:  7.7min
[Parallel(n_jobs=18)]: Done 206 tasks      | elapsed:  8.6min
[Parallel(n_jobs=18)]: Done 229 tasks      |

In [11]:
base_dir = os.path.join(root_dir, "timecourse_02")
fluo_files = sorted(glob.glob(os.path.join(base_dir, "**", "Basler*fluorescence.avi"), recursive=True))

base_dir = os.path.join(root_dir, "timecourse_02_joints")
fluo_files += sorted(glob.glob(os.path.join(base_dir, "**", "Basler*fluorescence.avi"), recursive=True))

base_dir = os.path.join(root_dir, "timecourse_03")
fluo_files += sorted(glob.glob(os.path.join(base_dir, "**", "Basler*fluorescence.avi"), recursive=True))

In [12]:
calibration_data = [toml.load(os.path.join(root_dir, "timecourse_02_calibration_v1.toml")),
                    toml.load(os.path.join(root_dir, "timecourse_02_calibration_v2.toml")),
                    toml.load(os.path.join(root_dir, "timecourse_04_calibration.toml")),
                    ]

In [13]:
# get subject names and filter that stuff...
metadata = {}
for _file in tqdm(fluo_files):
    metadata[_file] = toml.load(os.path.join(os.path.dirname(_file), "../metadata.toml"))

  0%|          | 0/601 [00:00<?, ?it/s]

In [14]:
delays = []
for _file in fluo_files:
    cam = os.path.basename(_file).replace("-fluorescence.avi", "")
    timestamp = pd.to_datetime(metadata[_file]["start_time"])
    if timestamp.floor("d") > pd.to_datetime("2024-11-10"):
        use_calibration_data = calibration_data[2]
    elif timestamp.floor("d") > pd.to_datetime("2024-06-10"):
        use_calibration_data = calibration_data[1]
    else:
        use_calibration_data = calibration_data[0]
    # for 0610 load v1 after that load v2 calibration data...
    if cam not in use_calibration_data["intrinsics"].keys():
        warnings.warn(f"{_file} uses {cam} which is not in intrinsics")
        continue
    delays.append(
        delayed(get_spatial_autocorr)(
            _file,
            intrinsic_matrix=np.array(use_calibration_data["intrinsics"][cam]),
            distortion_coeffs=np.array(use_calibration_data["distortion_coeffs"][cam]),
            **params,
        )
    )
print(len(delays))
dat2 = Parallel(n_jobs=15, verbose=10, backend="multiprocessing")(delays)

601


[Parallel(n_jobs=15)]: Using backend MultiprocessingBackend with 15 concurrent workers.
[Parallel(n_jobs=15)]: Done   2 tasks      | elapsed:   38.8s
[Parallel(n_jobs=15)]: Done  11 tasks      | elapsed:   46.9s
[Parallel(n_jobs=15)]: Done  20 tasks      | elapsed:  1.4min
[Parallel(n_jobs=15)]: Done  31 tasks      | elapsed:  2.0min
[Parallel(n_jobs=15)]: Done  42 tasks      | elapsed:  2.4min
[Parallel(n_jobs=15)]: Done  55 tasks      | elapsed:  2.8min
[Parallel(n_jobs=15)]: Done  68 tasks      | elapsed:  3.5min
[Parallel(n_jobs=15)]: Done  83 tasks      | elapsed:  4.4min
[Parallel(n_jobs=15)]: Done  98 tasks      | elapsed:  5.1min
[Parallel(n_jobs=15)]: Done 115 tasks      | elapsed:  5.7min
[Parallel(n_jobs=15)]: Done 132 tasks      | elapsed:  6.5min
[Parallel(n_jobs=15)]: Done 151 tasks      | elapsed:  7.5min
[Parallel(n_jobs=15)]: Done 170 tasks      | elapsed:  8.4min
[Parallel(n_jobs=15)]: Done 191 tasks      | elapsed:  9.4min
[Parallel(n_jobs=15)]: Done 212 tasks      |

In [15]:
dat += dat2

In [16]:
base_dir = os.path.join(root_dir, "sciadv_rebuttal/dilution_series")
fluo_files = sorted(glob.glob(os.path.join(base_dir, "**", "Basler*fluorescence.avi"), recursive=True))

# note that even if we crop, we're cropping from the BOTTOM and the RIGHT so no need to adjust intrinsics
# we only adjust cx and cy if we crop from TOP or LEFT
base_dir = os.path.join(root_dir, "sciadv_rebuttal/exposure_series")
fluo_files += sorted(glob.glob(os.path.join(base_dir, "**", "Basler*fluorescence.avi"), recursive=True))

In [17]:
# get subject names and filter that stuff...
metadata = {}
for _file in tqdm(fluo_files):
    metadata[_file] = toml.load(os.path.join(os.path.dirname(_file), "../metadata.toml"))

  0%|          | 0/138 [00:00<?, ?it/s]

In [18]:
delays = []
for _file in fluo_files:
    cam = os.path.basename(_file).replace("-fluorescence.avi", "")
    timestamp = pd.to_datetime(metadata[_file]["start_time"])
    if timestamp.floor("d") > pd.to_datetime("2024-11-10"):
        use_calibration_data = calibration_data[2]
    elif timestamp.floor("d") > pd.to_datetime("2024-06-10"):
        use_calibration_data = calibration_data[1]
    else:
        use_calibration_data = calibration_data[0]
    delays.append(
        delayed(get_spatial_autocorr)(
            _file,
            intrinsic_matrix=np.array(use_calibration_data["intrinsics"][cam]),
            distortion_coeffs=np.array(use_calibration_data["distortion_coeffs"][cam]),
            **params,
        )
    )
print(len(delays))
dat3 = Parallel(n_jobs=15, verbose=10, backend="multiprocessing")(delays)

138


[Parallel(n_jobs=15)]: Using backend MultiprocessingBackend with 15 concurrent workers.
[Parallel(n_jobs=15)]: Done   2 tasks      | elapsed:   33.6s
[Parallel(n_jobs=15)]: Done  11 tasks      | elapsed:   45.7s
[Parallel(n_jobs=15)]: Done  20 tasks      | elapsed:  1.1min
[Parallel(n_jobs=15)]: Done  31 tasks      | elapsed:  1.4min
[Parallel(n_jobs=15)]: Done  42 tasks      | elapsed:  1.6min
[Parallel(n_jobs=15)]: Done  55 tasks      | elapsed:  2.1min
[Parallel(n_jobs=15)]: Done  68 tasks      | elapsed:  2.4min
[Parallel(n_jobs=15)]: Done  83 tasks      | elapsed:  2.6min
[Parallel(n_jobs=15)]: Done  98 tasks      | elapsed:  2.8min
[Parallel(n_jobs=15)]: Done 123 out of 138 | elapsed:  3.1min remaining:   22.6s
[Parallel(n_jobs=15)]: Done 138 out of 138 | elapsed:  3.4min finished


In [19]:
dat = dat + dat3

In [20]:
use_dat = [_dat for _dat in dat if _dat is not None]
corr_df = pd.DataFrame(use_dat)

In [21]:
config = toml.load("config.toml")

In [22]:
corr_df = clean_df(
    corr_df,
    exp_types=config["aliases"],
    subject_typos=config["typos"]["subject"],
    chk_fields=config["parse_metadata"]["chk_fields"],
    exclude_subjects=config["exclusions"]["subjects"],
    exclude_dates=config["exclusions"]["dates"],
    exclude_pairs=config["exclusions"]["pairs"]
)

In [23]:

# convert it!

corr_df["distortion_coeffs"] = pa.array(corr_df["distortion_coeffs"].apply(lambda x: x.squeeze()))
corr_df["ave_corr"] = pa.array(corr_df["ave_corr"].apply(list))
corr_df["ave_raw_corr"] = pa.array(corr_df["ave_raw_corr"].apply(list))

# corr_df["max_surrogate"] = pa.array(corr_df["max_surrogate"].apply(list))
corr_df["intrinsic_matrix"] = pa.array(corr_df["intrinsic_matrix"].apply(list))

In [24]:
use_cameras = list(toml.load(os.path.join(root_dir, "timecourse_01_calibration.toml"))["distortion_coeffs"].keys())
corr_df = corr_df.query("camera.isin(@use_cameras)").copy()

In [25]:
os.makedirs(config["dirs"]["analysis"], exist_ok=True)
corr_df.to_parquet(os.path.join(config["dirs"]["analysis"], "fluorescence_autocorrelation.parquet"), engine="pyarrow")