In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import os
import copy
import cv2
import matplotlib.pyplot as plt
import numpy as np
import skimage
import toml
import glob
import joblib
import h5py
from tqdm.auto import tqdm
from markovids import pcl, vid, depth
from qd_analysis.util import clean_df

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [3]:
from scipy import signal

In [4]:
import warnings

In [5]:
from joblib import Parallel, delayed
import pandas as pd
import seaborn as sns

In [6]:
segmentation_dir = "_segmentation_tau-0-pretrain"

## User functions

In [7]:
def get_spatial_autocorr(
    dat_file,
    spacing=400,
    bground_spacing=2000,
    bground_agg_func=np.mean,
    reader_kwargs={"threads": 2},
    distortion_coeffs=None,
    intrinsic_matrix=None,
    segmentation_dir=segmentation_dir,
    bground_dir="_bground",
    output_dir="_autocorr",
    force=False,
):
    # TODO compute summary stats inside and outside of ROI

    metadata = toml.load(os.path.join(os.path.dirname(dat_file), "../metadata.toml"))
    dirname, fname_reflectance = os.path.split(
        dat_file.replace("fluorescence", "reflectance")
    )
    fname_reflectance, ext = os.path.splitext(fname_reflectance)
    fname_fluorescence, ext = os.path.splitext(os.path.basename(dat_file))

    segmentation_path = os.path.join(
        dirname, segmentation_dir, f"{fname_reflectance}.hdf5"
    )
    bground_path = os.path.join(dirname, bground_dir, f"{fname_fluorescence}.hdf5")
    save_file = os.path.join(dirname, output_dir, f"{fname_fluorescence}.parquet")

    os.makedirs(os.path.join(dirname, output_dir), exist_ok=True)
    os.makedirs(os.path.join(dirname, bground_dir), exist_ok=True)

    save_file = os.path.join(dirname, output_dir, "autocorr_data.pkl")

    if os.path.exists(save_file) and not force:
        try:
            results = joblib.load(save_file)
            return results
        except Exception as e:
            print(e)
            pass

    reader = vid.io.AutoReader(
        dat_file,
        **reader_kwargs,
    )
    frame_range = range(0, reader.nframes, spacing)

    if os.path.exists(segmentation_path):
        with h5py.File(segmentation_path) as f:
            masks = f["labels"][frame_range]
            masks = masks.astype("uint8")
    else:
        warnings.warn(f"No mask found {dat_file}")
        reader.close()
        return None

    if os.path.exists(bground_path):
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            with h5py.File(bground_path, "r") as f:
                rolling_bgrounds = f["bground"][()]
                idxs = f["frame_idxs"][()]
            frames = reader.get_frames(frame_range)
            # frames = reader.undistort_frames(frames)
    else:
        warnings.warn(f"No bground found {dat_file}")
        reader.close()
        return None

    bground_sub = np.zeros(frames.shape, dtype="int16")
    for i, (_idx, _frame) in enumerate(zip(frame_range, frames)):
        use_bground = np.argmin(np.abs(idxs - _idx))
        bground_sub[i] = np.clip(_frame - rolling_bgrounds[use_bground], 0, 255)
        bground_sub[i][masks[i] <= 0] = 0  # mask out non-mouse stuff...

    if intrinsic_matrix is not None:
        for i in range(len(masks)):
            masks[i] = cv2.undistort(
                masks[i], intrinsic_matrix, distortion_coeffs
            )
        for i in range(len(bground_sub)):
            bground_sub[i] = cv2.undistort(
                bground_sub[i], intrinsic_matrix, distortion_coeffs
            )
    
    # bground_sub = np.clip(frames - bground[None, ...], 0, 255).astype("int16")
    corrs = []
    for i in range(len(bground_sub)):
        corr_frame = bground_sub[i].astype("float")
        _corr = signal.correlate(corr_frame, corr_frame)
        # check normalization...
        _corr_norm = _corr / (np.sqrt(np.sum(corr_frame**2) ** 2))
        corrs.append(_corr_norm)

    nframes, height, width = bground_sub.shape
    ave_corr = np.mean(corrs, axis=0)

    results = {
        "ave_corr": ave_corr,
        # "metadata": metadata,
        "intrinsic_matrix": intrinsic_matrix,
        "distortion_coeffs": distortion_coeffs,
        "start_time": metadata["start_time"],
        "filename": dat_file
    }
    for k, v in metadata["user_input"].items():
        results[k] = v
    joblib.dump(results, save_file)
    return results
    # return bground_sub, metadata

# Quantify fluorescence length-scale

In [8]:
base_dir = "/mnt/data/jmarkow/active_projects/quantum_dots/timecourse_01"
fluo_files = sorted(glob.glob(os.path.join(base_dir, "**", "Basler*fluorescence.avi"), recursive=True))
base_dir = "/mnt/data/jmarkow/active_projects/quantum_dots/timecourse_01_agarose_beads"
fluo_files += sorted(glob.glob(os.path.join(base_dir, "**", "Basler*fluorescence.avi"), recursive=True))

In [9]:
calibration_data = toml.load("/home/jmarkow/data_dir/active_projects/quantum_dots/timecourse_01_calibration.toml")

In [10]:
# get subject names and filter that stuff...
metadata = {}
for _file in tqdm(fluo_files):
    metadata[_file] = toml.load(os.path.join(os.path.dirname(_file), "../metadata.toml"))

  0%|          | 0/780 [00:00<?, ?it/s]

In [11]:
delays = []
for _file in fluo_files:
    cam = os.path.basename(_file).replace("-fluorescence.avi", "")
    delays.append(
        delayed(get_spatial_autocorr)(
            _file,
            intrinsic_matrix=np.array(calibration_data["intrinsics"][cam]),
            distortion_coeffs=np.array(calibration_data["distortion_coeffs"][cam]),
        )
    )
print(len(delays))
dat = Parallel(n_jobs=-1, verbose=10, backend="multiprocessing")(delays)

780


[Parallel(n_jobs=-1)]: Using backend MultiprocessingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:    4.5s
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    5.3s
[Parallel(n_jobs=-1)]: Done  29 tasks      | elapsed:    9.3s
[Parallel(n_jobs=-1)]: Done  40 tasks      | elapsed:   10.2s
[Parallel(n_jobs=-1)]: Done  53 tasks      | elapsed:   14.1s
[Parallel(n_jobs=-1)]: Done  66 tasks      | elapsed:   17.4s
[Parallel(n_jobs=-1)]: Done  81 tasks      | elapsed:   21.2s
[Parallel(n_jobs=-1)]: Done  96 tasks      | elapsed:   22.8s
[Parallel(n_jobs=-1)]: Done 113 tasks      | elapsed:   26.7s
[Parallel(n_jobs=-1)]: Done 130 tasks      | elapsed:   30.7s
[Parallel(n_jobs=-1)]: Done 149 tasks      | elapsed:   35.1s
[Parallel(n_jobs=-1)]: Done 168 tasks      | elapsed:   39.8s
[Parallel(n_jobs=-1)]: Done 189 tasks      | elapsed:   43.9s
[Parallel(n_jobs=-1)]: Done 210 tasks      | elapsed:   48.5s
[Parallel(n_jobs=-1)]: Done 233 tasks      |

In [12]:
base_dir = "/mnt/data/jmarkow/active_projects/quantum_dots/timecourse_02"
fluo_files = sorted(glob.glob(os.path.join(base_dir, "**", "Basler*fluorescence.avi"), recursive=True))
base_dir = "/mnt/data/jmarkow/active_projects/quantum_dots/timecourse_02_joints"
fluo_files += sorted(glob.glob(os.path.join(base_dir, "**", "Basler*fluorescence.avi"), recursive=True))
base_dir = "/mnt/data/jmarkow/active_projects/quantum_dots/timecourse_03"
fluo_files += sorted(glob.glob(os.path.join(base_dir, "**", "Basler*fluorescence.avi"), recursive=True))

In [13]:
calibration_data = [toml.load("/home/jmarkow/data_dir/active_projects/quantum_dots/timecourse_02_calibration_v1.toml"),
                    toml.load("/home/jmarkow/data_dir/active_projects/quantum_dots/timecourse_02_calibration_v2.toml")]

In [14]:
# get subject names and filter that stuff...
metadata = {}
for _file in tqdm(fluo_files):
    metadata[_file] = toml.load(os.path.join(os.path.dirname(_file), "../metadata.toml"))

  0%|          | 0/485 [00:00<?, ?it/s]

In [None]:
delays = []
for _file in fluo_files:
    cam = os.path.basename(_file).replace("-fluorescence.avi", "")
    timestamp = pd.to_datetime(metadata[_file]["start_time"])
    if timestamp.floor("d") <= pd.to_datetime("2024-06-10"):
        use_calibration_data = calibration_data[0]
    else:
        use_calibration_data = calibration_data[1]
    # for 0610 load v1 after that load v2 calibration data...
    delays.append(
        delayed(get_spatial_autocorr)(
            _file,
            intrinsic_matrix=np.array(use_calibration_data["intrinsics"][cam]),
            distortion_coeffs=np.array(use_calibration_data["distortion_coeffs"][cam]),
        )
    )
print(len(delays))
dat2 = Parallel(n_jobs=-1, verbose=10, backend="multiprocessing")(delays)

485


[Parallel(n_jobs=-1)]: Using backend MultiprocessingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:    9.8s
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:   10.6s
[Parallel(n_jobs=-1)]: Done  29 tasks      | elapsed:   14.0s
[Parallel(n_jobs=-1)]: Done  40 tasks      | elapsed:   15.0s
[Parallel(n_jobs=-1)]: Done  53 tasks      | elapsed:   18.8s
[Parallel(n_jobs=-1)]: Done  66 tasks      | elapsed:   22.2s
[Parallel(n_jobs=-1)]: Done  81 tasks      | elapsed:   25.7s
[Parallel(n_jobs=-1)]: Done  96 tasks      | elapsed:   27.9s
[Parallel(n_jobs=-1)]: Done 113 tasks      | elapsed:   31.0s
[Parallel(n_jobs=-1)]: Done 130 tasks      | elapsed:   34.8s
[Parallel(n_jobs=-1)]: Done 149 tasks      | elapsed:   39.1s
[Parallel(n_jobs=-1)]: Done 168 tasks      | elapsed:   43.2s
[Parallel(n_jobs=-1)]: Done 189 tasks      | elapsed:   47.2s
[Parallel(n_jobs=-1)]: Done 210 tasks      | elapsed:   48.7s
[Parallel(n_jobs=-1)]: Done 233 tasks      |

In [None]:
dat += dat2

In [None]:
# only include +1hr

In [None]:
max_lag = 50 # in px
z = 300  # in mm

In [None]:
height, width = dat[0]["ave_corr"].shape
im_height, im_width = height //2, width // 2
height_lags = np.arange(-height // 2, height // 2) + 1
width_lags = np.arange(-width // 2, width // 2) + 1

In [None]:
use_height = np.abs(height_lags) <= max_lag
use_width = np.abs(width_lags) <= max_lag

In [None]:
use_dat = [_dat for _dat in dat if _dat is not None]
corr_df = pd.DataFrame(use_dat)

In [None]:
corr_df["ave_corr"] = corr_df["ave_corr"].apply(lambda x: x[use_height][:,use_width])

In [None]:
config = toml.load("config.toml")

In [None]:
corr_df = clean_df(
    corr_df,
    exp_types=config["aliases"],
    subject_typos=config["typos"]["subject"],
    chk_fields=config["parse_metadata"]["chk_fields"],
    exclude_subjects=config["exclusions"]["subjects"],
    exclude_dates=config["exclusions"]["dates"],
)

In [None]:
import pyarrow as pa

In [None]:
# convert it!

In [None]:
corr_df["distortion_coeffs"] = pa.array(corr_df["distortion_coeffs"].apply(lambda x: x.squeeze()))
corr_df["ave_corr"] = pa.array(corr_df["ave_corr"].apply(list))
corr_df["intrinsic_matrix"] = pa.array(corr_df["intrinsic_matrix"].apply(list))

In [None]:
os.makedirs(config["dirs"]["analysis"], exist_ok=True)
corr_df.to_parquet(os.path.join(config["dirs"]["analysis"], "fluorescence_autocorrelation.parquet"), engine="pyarrow")

In [None]:
# need to apply vstack when you load in...