In [5]:
# imports:
from pathlib import Path
import numpy as np
import pandas as pd
from bokeh.io import output_notebook, show
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, CustomJS, Span
try:
    # Bokeh 3.x
    from bokeh.models import Slider
except Exception:
    # Bokeh 2.x
    from bokeh.models.widgets import Slider
    print('ho')
from bokeh.layouts import column, row
import UtilityFunctions_newOE as uf
from typing import Optional
import re

In [6]:
# simple approach
# --- Simple eye synchronization to the Open Ephys (OE) timebase ---
# Algorithm:
#   (1) Read internal timestamps (seconds) for each eye; delta-analysis to verify stability.
#   (2) Get the FIRST TTL sample for that eye from oe_events.
#   (3) Place frame 0 at that TTL, and place frame i at:  first_TTL_sample + round(fs * (t_sec[i] - t_sec[0])).
#       (i.e., use internal timing deltas verbatim; no regression.)
#   (4) Attach brightness from block.<le/re>_frame_val_list.
#   (5) Return a tidy per-eye DataFrame indexed by OE samples (int64), with OE time in seconds and brightness.



def _get_fs(block) -> float:
    """Return Open Ephys sample rate (Hz)."""
    fs = getattr(block, 'sample_rate', None)
    if fs is None:
        fs = float(block.get_sample_rate())
        block.sample_rate = fs
    return float(fs)

def _locate_eye_timestamps_csv(mp4: Path) -> Path:
    """
    Find <stem>_timestamps.csv even if the mp4 is *_LE.mp4 / *_RE.mp4
    while the CSV is <base>_timestamps.csv.
    """
    stem = mp4.stem
    stripped = re.sub(r'([_\-]?)(LE|RE)$', '', stem, flags=re.IGNORECASE)
    # 1) exact in same folder
    for s in (stem, stripped):
        p = mp4.with_name(s + "_timestamps.csv")
        if p.exists():
            return p
    # 2) fuzzy in same folder
    for pat in (f"{stripped}*timestamp*.csv", f"{stripped}*time*.csv",
                "*timestamp*.csv", "*time*.csv"):
        for p in mp4.parent.glob(pat):
            return p
    # 3) common subfolders
    for sub in ("timestamps", "frames_timestamps"):
        for d in (mp4.parent / sub, mp4.parent.parent / sub):
            if d.exists():
                for pat in (f"{stripped}*timestamp*.csv", f"{stripped}*time*.csv",
                            "*timestamp*.csv", "*time*.csv"):
                    for p in d.glob(pat):
                        return p
    # 4) LE/RE root recursive
    root = mp4.parents[1]
    for pat in (f"{stripped}*timestamp*.csv", f"{stripped}*time*.csv",
                "*timestamp*.csv", "*time*.csv"):
        for p in root.rglob(pat):
            return p
    raise FileNotFoundError(f"Timestamp CSV not found near {mp4}")

def _read_eye_internal_seconds(block, eye: str) -> np.ndarray:
    """Load per-frame internal timestamps (seconds) for 'left' or 'right' eye."""
    if getattr(block, 'le_videos', None) is None or getattr(block, 're_videos', None) is None:
        block.handle_eye_videos()
    mp4 = Path(block.le_videos[0] if eye == 'left' else block.re_videos[0])
    csvp = _locate_eye_timestamps_csv(mp4)
    df = pd.read_csv(csvp, engine="python")
    # choose a sensible time column
    cols = [c for c in df.columns if 'time' in str(c).lower()]
    if not cols:
        # fallback: first numeric column
        for c in df.columns:
            if pd.api.types.is_numeric_dtype(df[c]):
                cols = [c]; break
    if not cols:
        # last resort: column 0, no header
        df = pd.read_csv(csvp, header=None, engine="python"); cols = [0]
    t = df[cols[0]].to_numpy(dtype='float64')
    if np.any(~np.isfinite(t)) or len(t) < 3:
        raise ValueError(f"Bad or too-short timestamps in {csvp}")
    if np.any(np.diff(t) <= 0):
        raise ValueError(f"Timestamps not strictly increasing in {csvp}")
    return t

def _delta_analysis(t_sec: np.ndarray, label: str, cov_warn: float = 0.05) -> dict:
    """
    Basic delta analysis: fps median, CoV, outlier rate.
    Prints a short report; returns metrics.
    """
    dt = np.diff(t_sec)
    fps = 1.0 / np.median(dt)
    cov = float(np.std(dt) / np.mean(dt)) if np.mean(dt) > 0 else np.inf
    p01, p99 = np.percentile(dt, [1, 99])
    out_frac = float(np.mean((dt < p01) | (dt > p99)))
    print(f"[{label}] frames={len(t_sec):,} | median fps={fps:.3f} | CoV(dt)={cov*100:.2f}% | outliers(±1–99%)={out_frac*100:.2f}%")
    if cov > cov_warn:
        print(f"[WARN] {label}: CoV(dt) > {cov_warn*100:.1f}%. Stream may be unstable.")
    return dict(fps=fps, cov=cov, out_frac=out_frac, dt=dt)

def _first_ttl_sample(block, eye: str) -> int:
    """Get the FIRST TTL (OE samples) for the given eye."""
    col = 'L_eye_TTL' if eye == 'left' else 'R_eye_TTL'
    s = block.oe_events[col].dropna().astype(int).to_numpy()
    if s.size == 0:
        raise RuntimeError(f"No TTLs found for {eye} eye in oe_events['{col}'].")
    return int(s[0])

def build_eye_df_simple(block, eye: str, cov_warn: float = 0.05) -> pd.DataFrame:
    """
    Make a per-eye DataFrame indexed by OE samples using the simple anchor-at-first-TTL approach.
    Columns: ['frame_idx', 'oe_time_s', 'brightness'].
    """
    fs = _get_fs(block)
    t_sec = _read_eye_internal_seconds(block, eye)
    _delta_analysis(t_sec, label=eye.upper(), cov_warn=cov_warn)

    # anchor: first TTL sample; place frame 0 at this time; others by internal deltas
    t0_oe = _first_ttl_sample(block, eye)            # samples
    t_rel = t_sec - t_sec[0]                         # seconds relative to first frame
    oe_samples = t0_oe + np.round(fs * t_rel).astype(np.int64)

    # brightness from BlockSync
    b_list = getattr(block, 'le_frame_val_list' if eye == 'left' else 're_frame_val_list')
    b = np.asarray(b_list, dtype='float64')
    n = min(len(b), len(oe_samples))
    if len(b) != len(oe_samples):
        print(f"[INFO] {eye.upper()}: brightness length ({len(b)}) != frames ({len(oe_samples)}); clipping to {n}.")
    oe_samples = oe_samples[:n]
    b = b[:n]

    # Construct DataFrame (deduplicate OE stamps if rounding collided)
    df = pd.DataFrame({'frame_idx': np.arange(n, dtype=int),
                       'oe_sample': oe_samples,
                       'brightness': b})
    # If any duplicate oe_sample due to rounding, keep the first
    df = df.sort_values('oe_sample').drop_duplicates('oe_sample', keep='first')
    df['oe_time_s'] = df['oe_sample'] / fs
    df = df.set_index('oe_sample')
    return df

def simple_sync_build(block, cov_warn: float = 0.05, export: bool = False):
    """
    Run the simple synchronization for both eyes and (optionally) export CSVs.
    Returns (df_left, df_right).
    """
    dfL = build_eye_df_simple(block, 'left', cov_warn=cov_warn)
    dfR = build_eye_df_simple(block, 'right', cov_warn=cov_warn)
    if export:
        outL = Path(block.analysis_path) / "eye_left_simple_sync.csv"
        outR = Path(block.analysis_path) / "eye_right_simple_sync.csv"
        dfL.to_csv(outL); dfR.to_csv(outR)
        print(f"[OK] Saved: {outL}")
        print(f"[OK] Saved: {outR}")
    return dfL, dfR

from bokeh.io import output_file, show
def plot_simple_sync_bokeh(block, df_left=None, df_right=None, shift_range=200, show_led=True,  to_browser=True):
    """
    Bokeh plot of both eyes' brightness vs Open Ephys time (seconds), with manual shift sliders.

    Parameters
    ----------
    block : BlockSync
        Your BlockSync object (must have .oe_events and sample_rate; and brightness lists).
    df_left, df_right : pd.DataFrame or None
        Per-eye DataFrames from simple_sync_build(); if None, they are computed on the fly.
        Expected columns: ['oe_time_s','brightness'] and index = oe_sample (int).
    shift_range : int
        Slider range in *indices* (grid ticks). Positive values shift the trace to the right.
    show_led : bool
        If True, draw LED event verticals (semi-transparent).
    """
    # Helper to get fs
    def _get_fs(b) -> float:
        fs = getattr(b, 'sample_rate', None)
        if fs is None:
            fs = float(b.get_sample_rate()); b.sample_rate = fs
        return float(fs)

    # Build simple sync if needed
    if df_left is None or df_right is None:
        df_left, df_right = simple_sync_build(block, export=False)

    fs = _get_fs(block)

    # Prepare arrays; convert NaNs/Infs in Y to None for Bokeh
    def _nan2none(a):
        return [None if (not np.isfinite(v)) else float(v) for v in a]

    xL = df_left['oe_time_s'].to_numpy(dtype=float)
    yL = df_left['brightness'].to_numpy(dtype=float)
    xR = df_right['oe_time_s'].to_numpy(dtype=float)
    yR = df_right['brightness'].to_numpy(dtype=float)

    # Median step (for your mental ms-per-tick model)
    stepL_ms = float(np.median(np.diff(xL))*1000.0) if len(xL) > 1 else float('nan')
    stepR_ms = float(np.median(np.diff(xR))*1000.0) if len(xR) > 1 else float('nan')
    print(f"[INFO] Slider tick ≈ {stepL_ms:.3f} ms (Left), {stepR_ms:.3f} ms (Right)")

    # ColumnDataSource: we keep original y as 'y0' and the displayed (shifted) series as 'y'
    src_le = ColumnDataSource(dict(x=xL.tolist(), y=_nan2none(yL), y0=_nan2none(yL)))
    src_re = ColumnDataSource(dict(x=xR.tolist(), y=_nan2none(yR), y0=_nan2none(yR)))

    # Figure
    output_notebook()
    p = figure(title="Simple synchronization — brightness vs OE time (s)  (zoom/pan; use sliders to shift)",
               x_axis_label="OE time (s)", y_axis_label="Brightness (a.u.)",
               width=1200, height=450, tools="pan,wheel_zoom,box_zoom,reset,save")

    p.line('x', 'y', source=src_le, line_width=1.5, color="#1f77b4", legend_label="Left eye")
    p.line('x', 'y', source=src_re, line_width=1.5, color="#d62728", legend_label="Right eye")
    p.legend.click_policy = "hide"

    # LED verticals (semi-transparent)
    if show_led and ('LED_driver' in block.oe_events.columns):
        led = block.oe_events['LED_driver'].dropna().astype(int).to_numpy()
        if led.size:
            led_s = led / fs
            for x in led_s:
                p.add_layout(Span(location=float(x), dimension='height',
                                  line_color="#2ca02c", line_alpha=0.5, line_width=1.5))

    # Sliders: per-trace integer *index* shifts (like your old tool)
    sL = Slider(title="Left Eye Shift (indices)",  start=-shift_range, end=shift_range, value=0, step=1, width=350)
    sR = Slider(title="Right Eye Shift (indices)", start=-shift_range, end=shift_range, value=0, step=1, width=350)

    # JS callback: for each series, shift y by index; x stays fixed (so the curve slides along X)
    cb = CustomJS(args=dict(le=src_le, re=src_re), code="""
        const sL = sL_slider.value|0;
        const sR = sR_slider.value|0;

        // LEFT
        const yL  = le.data['y'];
        const yL0 = le.data['y0'];
        const NL  = yL.length;
        for (let i=0; i<NL; i++) {
            const j = i + sL;
            yL[i] = (j>=0 && j<NL) ? yL0[j] : null;
        }
        le.change.emit();

        // RIGHT
        const yR  = re.data['y'];
        const yR0 = re.data['y0'];
        const NR  = yR.length;
        for (let i=0; i<NR; i++) {
            const j = i + sR;
            yR[i] = (j>=0 && j<NR) ? yR0[j] : null;
        }
        re.change.emit();
    """)
    cb.args['sL_slider'] = sL
    cb.args['sR_slider'] = sR
    sL.js_on_change('value', cb)
    sR.js_on_change('value', cb)

    show(column(p, row(sL, sR)))

def _get_fs(block) -> float:
    fs = getattr(block, 'sample_rate', None)
    if fs is None:
        fs = float(block.get_sample_rate()); block.sample_rate = fs
    return float(fs)

def _assert_strictly_increasing(name: str, arr: np.ndarray):
    if arr.size < 2 or not np.all(np.diff(arr) > 0):
        raise ValueError(f"{name} must be strictly increasing. Found non-monotonic sequence.")

def _build_arena_grid(block, target_fps: float):
    fs = _get_fs(block)
    arena = (
        block.oe_events[['Arena_TTL','Arena_TTL_frame']]
        .dropna()
        .astype({'Arena_TTL': int})
        .sort_values('Arena_TTL')   # Arena TTLs should be monotone; sorting here does NOT touch eye streams
    )
    if len(arena) < 2:
        raise RuntimeError("Not enough Arena_TTL events to build a grid.")
    step = int(round(fs / target_fps))               # samples/tick
    start = int(arena['Arena_TTL'].iloc[0])
    stop  = int(arena['Arena_TTL'].iloc[-1])
    grid  = np.arange(start, stop + 1, step, dtype=np.int64)
    return fs, grid, step, arena

def _nearest_with_tol(sorted_vec: np.ndarray, queries: np.ndarray, tol: int) -> np.ndarray:
    """
    Return indices into sorted_vec of the nearest element to each query,
    but mark as -1 if the nearest is farther than tol (in samples).
    Assumes sorted_vec is strictly increasing (we assert that).
    """
    _assert_strictly_increasing("sorted_vec", sorted_vec)
    pos = np.searchsorted(sorted_vec, queries, side='left')
    pos0 = np.clip(pos - 1, 0, len(sorted_vec) - 1)
    pos1 = np.clip(pos,     0, len(sorted_vec) - 1)
    d0 = np.abs(sorted_vec[pos0] - queries)
    d1 = np.abs(sorted_vec[pos1] - queries)
    idx = np.where(d0 <= d1, pos0, pos1)
    d = np.minimum(d0, d1)
    idx[d > tol] = -1
    return idx

def _shift_eye_df_by_index(df: pd.DataFrame, shift: int) -> pd.DataFrame:
    """
    EXACT slider semantics on an eye df (index=oe_sample; cols: frame_idx, brightness, oe_time_s):
      - time index (oe_sample) and oe_time_s unchanged
      - shift BOTH frame_idx and brightness by `shift` along the current order
      - edges filled with NaN, no wrap
    """
    if shift == 0:
        return df.copy()
    df = df.copy()
    n = len(df)
    fi = df['frame_idx'].to_numpy(dtype=float)
    br = df['brightness'].to_numpy(dtype=float)
    fi_sh = np.full(n, np.nan, dtype=float)
    br_sh = np.full(n, np.nan, dtype=float)
    if shift > 0:
        fi_sh[shift:] = fi[:-shift]
        br_sh[shift:] = br[:-shift]
    else:
        s = -int(shift)
        fi_sh[:-s] = fi[s:]
        br_sh[:-s] = br[s:]
    df['frame_idx']  = fi_sh
    df['brightness'] = br_sh
    return df

def describe_eye_tick(df: pd.DataFrame) -> float:
    """
    Return the median sampling interval in milliseconds for this eye dataframe.
    df must have an 'oe_time_s' column (from simple_sync_build).
    """
    t = df['oe_time_s'].to_numpy(dtype=float)
    if len(t) < 2:
        return float('nan')
    return float(np.median(np.diff(t)) * 1000.0)

def shift_eye_df_by_index(df: pd.DataFrame, shift: int) -> pd.DataFrame:
    """
    Apply the SAME 'index-based shift' as the Bokeh slider to an eye dataframe:
      - Keep time ('oe_time_s') and the OE-sample index (df.index) unchanged.
      - Shift BOTH 'frame_idx' and 'brightness' by `shift` along the time-sorted order.
      - Fill the vacated edges with NaN (no wrap-around).
    Assumes df is the output of simple_sync_build (index = oe_sample, cols include 'frame_idx','brightness','oe_time_s').
    """
    if shift == 0:
        return df.copy()

    # Ensure sorted by time (simple_sync_build already gives it, but be safe)
    df = df.sort_index().copy()

    # Build new columns by discrete shift
    n = len(df)
    frame_idx = df['frame_idx'].to_numpy(dtype=float)   # float to allow NaN
    bright    = df['brightness'].to_numpy(dtype=float)

    shifted_idx = np.full(n, np.nan, dtype=float)
    shifted_y   = np.full(n, np.nan, dtype=float)

    if shift > 0:
        shifted_idx[shift:] = frame_idx[:-shift]
        shifted_y[shift:]   = bright[:-shift]
    else:
        s = -int(shift)
        shifted_idx[:-s] = frame_idx[s:]
        shifted_y[:-s]   = bright[s:]

    out = df.copy()
    out['frame_idx'] = shifted_idx
    out['brightness'] = shifted_y
    return out

def build_final_sync_df_merge_nearest(
    block,
    df_left:  Optional[pd.DataFrame],
    df_right: Optional[pd.DataFrame],
    target_fps: float = 60.0,
    tol_frac: float = 0.9,          # nearest is accepted if within tol_frac * tick
    pre_shift_left:  int = 0,       # apply EXACT slider-like index shift BEFORE merge
    pre_shift_right: int = 0,
    export_csv: bool = True,
    csv_name: str = "blocksync_df.csv",
    verbose: bool = True,
) -> pd.DataFrame:
    """
    Build downstream-compatible final_sync_df by merging df_left/df_right onto a 60 Hz Arena grid
    using nearest-with-tolerance, without re-sorting the eye dataframes.

    Expects df_left/df_right from simple_sync_build (index=oe_sample; cols: frame_idx, oe_time_s, brightness).
    """
    # 1) Build Arena grid
    fs, grid, step, arena = _build_arena_grid(block, target_fps=target_fps)
    tol = int(np.ceil(tol_frac * step))  # in samples
    tick_ms = 1000.0 * step / fs

    # 2) Eye dfs: sanity + optional pre-merge slider-like shifts (NO sorting is performed)
    for nm, df in (("LEFT", df_left), ("RIGHT", df_right)):
        if not isinstance(df.index.values, np.ndarray):
            raise ValueError(f"{nm}: df.index must be OE samples.")
        _assert_strictly_increasing(f"{nm} df.index (oe_sample)", df.index.values.astype(np.int64))
        if not {'frame_idx','brightness','oe_time_s'}.issubset(df.columns):
            raise ValueError(f"{nm}: df must include 'frame_idx','brightness','oe_time_s'.")

    if pre_shift_left:
        if verbose: print(f"[INFO] Pre-shifting LEFT by {pre_shift_left} ticks (slider semantics).")
        df_left = _shift_eye_df_by_index(df_left, pre_shift_left)
    if pre_shift_right:
        if verbose: print(f"[INFO] Pre-shifting RIGHT by {pre_shift_right} ticks (slider semantics).")
        df_right = _shift_eye_df_by_index(df_right, pre_shift_right)

    # 3) Map Arena frames to the grid (nearest-with-tolerance)
    a_times  = arena['Arena_TTL'].to_numpy(dtype=np.int64)
    a_frames = arena['Arena_TTL_frame'].to_numpy(dtype=np.int64)
    idxA = _nearest_with_tol(a_times, grid, tol)
    arena_frame = np.full(grid.shape, np.nan, dtype=float)
    okA = idxA >= 0
    arena_frame[okA] = a_frames[idxA[okA]]

    # 4) Nearest-with-tolerance merge for LEFT and RIGHT (NO resort of dfs)
    def _map_eye_to_grid(df_eye: pd.DataFrame):
        t   = df_eye.index.to_numpy(dtype=np.int64)       # oe_sample per frame
        fi  = df_eye['frame_idx'].to_numpy(dtype=float)
        val = df_eye['brightness'].to_numpy(dtype=float)
        idx = _nearest_with_tol(t, grid, tol)             # indices into t
        frames = np.full(grid.shape, np.nan, dtype=float)
        vals   = np.full(grid.shape, np.nan, dtype=float)
        ok = idx >= 0
        frames[ok] = fi[idx[ok]]
        vals[ok]   = val[idx[ok]]
        return frames, vals

    L_eye_frame, L_values = _map_eye_to_grid(df_left)
    R_eye_frame, R_values = _map_eye_to_grid(df_right)

    # 5) Assemble final df (identical column names/format to legacy)
    final_df = pd.DataFrame({
        'Arena_TTL':   grid.astype(float),   # float to mimic legacy style (… .0)
        'Arena_frame': arena_frame,
        'L_eye_frame': L_eye_frame,
        'R_eye_frame': R_eye_frame,
        'L_values':    L_values,
        'R_values':    R_values,
    })

    # 6) Save & attach
    if export_csv:
        outp = Path(block.analysis_path) / csv_name
        final_df.to_csv(outp, index=False)
        if verbose: print(f"[OK] Saved final sync CSV → {outp}")
    block.final_sync_df = final_df

    if verbose:
        print(f"[INFO] Grid rows: {len(grid):,} | tick ≈ {tick_ms:.3f} ms | tol={tol} samp (tol_frac={tol_frac:.2f})")
        nL = int(np.sum(np.isfinite(L_values))); nR = int(np.sum(np.isfinite(R_values)))
        print(f"[INFO] Valid LEFT grid points: {nL:,} | Valid RIGHT grid points: {nR:,}")

    return final_df

def sanity_plot_final_df(final_df, fs, show_led_off=False, led_off_samples=None, title="final_df sanity"):
    """
    final_df: DataFrame with columns ['Arena_TTL','L_values','R_values']
    fs:       Open Ephys sample rate (Hz)
    show_led_off: if True, draw vertical lines at LED-OFF sample indices in led_off_samples
    """
    x_s  = np.asarray(final_df['Arena_TTL'], dtype=float) / fs
    yL   = np.asarray(final_df['L_values'], dtype=float)
    yR   = np.asarray(final_df['R_values'], dtype=float)

    output_notebook()
    p = figure(title=title, x_axis_label="OE time (s)", y_axis_label="Brightness (a.u.)",
               width=1200, height=450, tools="pan,wheel_zoom,box_zoom,reset,save")
    src = ColumnDataSource(dict(x=x_s, yL=yL, yR=yR))
    p.line('x', 'yL', source=src, line_width=1.5, color="#1f77b4", legend_label="Left eye")
    p.line('x', 'yR', source=src, line_width=1.5, color="#d62728", legend_label="Right eye")
    p.legend.click_policy = "hide"

    if show_led_off and led_off_samples is not None and len(led_off_samples):
        for s in led_off_samples:
            p.add_layout(Span(location=float(s)/fs, dimension='height',
                              line_color="#2ca02c", line_alpha=0.5, line_width=1.5))
    show(p)

In [41]:
# block instantiation:
bad_blocks = [] #
experiment_path = Path(r"Z:\Nimrod\experiments")

block_numbers = [13,14,15,16,17,18]
animal = 'PV_208'
block_collection = uf.block_generator(block_numbers=block_numbers,
                                      experiment_path=experiment_path,
                                      animal=animal,
                                      bad_blocks=bad_blocks,regev=True,
                                      )
for block in block_collection:
    block.channeldict = None
    if block.animal_call == 'PV_208':
        block.channeldict={1: 'LED_driver',
                           7: 'L_eye_TTL',
                           2: 'Arena_TTL',
                           8: 'R_eye_TTL'}
# create a block_dict object for ease of access:
block_dict = {}
for b in block_collection:
    block_dict[str(b.block_num)] = b

instantiated block number 013 at Path: Z:\Nimrod\experiments\PV_208\2025_12_09\block_013, new OE version
Found the sample rate for block 013 in the xml file, it is 20000 Hz
created the .oe_rec attribute as an open ephys recording obj with get_data functionality
retrieving zertoh sample number for block 013
got it!
instantiated block number 014 at Path: Z:\Nimrod\experiments\PV_208\2025_12_09\block_014, new OE version
Found the sample rate for block 014 in the xml file, it is 20000 Hz
created the .oe_rec attribute as an open ephys recording obj with get_data functionality
retrieving zertoh sample number for block 014
got it!
instantiated block number 015 at Path: Z:\Nimrod\experiments\PV_208\2025_12_09\block_015, new OE version
Found the sample rate for block 015 in the xml file, it is 20000 Hz
created the .oe_rec attribute as an open ephys recording obj with get_data functionality
retrieving zertoh sample number for block 015
got it!
instantiated block number 016 at Path: Z:\Nimrod\exp

In [42]:
from bokeh.io import show, reset_output
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, CustomJS, Span
try:
    from bokeh.models import Slider  # Bokeh 3.x
except Exception:
    from bokeh.models.widgets import Slider  # Bokeh 2.x
from bokeh.layouts import column, row
import numpy as np

def plot_simple_sync_bokeh(block, df_left=None, df_right=None, shift_range=200, show_led=True, to_browser=True):
    """
    Bokeh plot of both eyes' brightness vs Open Ephys time (seconds), with manual shift sliders.
    Opens in the system default browser using a temporary HTML (not saved in your project).
    """

    # --- helper for fs ---
    def _get_fs(b) -> float:
        fs = getattr(b, 'sample_rate', None)
        if fs is None:
            fs = float(b.get_sample_rate()); b.sample_rate = fs
        return float(fs)

    # Build simple sync if needed
    if df_left is None or df_right is None:
        df_left, df_right = simple_sync_build(block, export=False)

    fs = _get_fs(block)

    # Convert NaNs/Infs in Y to None for Bokeh
    def _nan2none(a):
        return [None if (not np.isfinite(v)) else float(v) for v in a]

    xL = df_left['oe_time_s'].to_numpy(dtype=float)
    yL = df_left['brightness'].to_numpy(dtype=float)
    xR = df_right['oe_time_s'].to_numpy(dtype=float)
    yR = df_right['brightness'].to_numpy(dtype=float)

    stepL_ms = float(np.median(np.diff(xL))*1000.0) if len(xL) > 1 else float('nan')
    stepR_ms = float(np.median(np.diff(xR))*1000.0) if len(xR) > 1 else float('nan')
    print(f"[INFO] Slider tick ≈ {stepL_ms:.3f} ms (Left), {stepR_ms:.3f} ms (Right)")

    src_le = ColumnDataSource(dict(x=xL.tolist(), y=_nan2none(yL), y0=_nan2none(yL)))
    src_re = ColumnDataSource(dict(x=xR.tolist(), y=_nan2none(yR), y0=_nan2none(yR)))

    # --- figure ---
    p = figure(title="Simple synchronization — brightness vs OE time (s)  (zoom/pan; use sliders to shift)",
               x_axis_label="OE time (s)", y_axis_label="Brightness (a.u.)",
               width=1200, height=450, tools="pan,wheel_zoom,box_zoom,reset,save")

    p.line('x', 'y', source=src_le, line_width=1.5, color="#1f77b4", legend_label="Left eye")
    p.line('x', 'y', source=src_re, line_width=1.5, color="#d62728", legend_label="Right eye")
    p.legend.click_policy = "hide"

    # LED verticals
    if show_led and ('LED_driver' in getattr(block, 'oe_events', {}).columns):
        led = block.oe_events['LED_driver'].dropna().astype(int).to_numpy()
        if led.size:
            led_s = led / fs
            for x in led_s:
                p.add_layout(Span(location=float(x), dimension='height',
                                  line_color="#2ca02c", line_alpha=0.5, line_width=1.5))

    # --- sliders ---
    sL = Slider(title="Left Eye Shift (indices)",  start=-shift_range, end=shift_range, value=0, step=1, width=350)
    sR = Slider(title="Right Eye Shift (indices)", start=-shift_range, end=shift_range, value=0, step=1, width=350)

    cb = CustomJS(args=dict(le=src_le, re=src_re), code="""
        const sL = sL_slider.value|0;
        const sR = sR_slider.value|0;

        // LEFT
        const yL  = le.data['y'];
        const yL0 = le.data['y0'];
        const NL  = yL.length;
        for (let i=0; i<NL; i++) {
            const j = i + sL;
            yL[i] = (j>=0 && j<NL) ? yL0[j] : null;
        }
        le.change.emit();

        // RIGHT
        const yR  = re.data['y'];
        const yR0 = re.data['y0'];
        const NR  = yR.length;
        for (let i=0; i<NR; i++) {
            const j = i + sR;
            yR[i] = (j>=0 && j<NR) ? yR0[j] : null;
        }
        re.change.emit();
    """)
    cb.args['sL_slider'] = sL
    cb.args['sR_slider'] = sR
    sL.js_on_change('value', cb)
    sR.js_on_change('value', cb)

    layout = column(p, row(sL, sR))

    # --- open in default browser without saving to your project ---
    reset_output()                         # ensure no notebook/file output is active
    if to_browser:
        show(layout)    # uses a TEMP file and opens your default browser
    else:
        # fallback: inline notebook, if you ever want it
        from bokeh.io import output_notebook
        from bokeh.resources import INLINE
        output_notebook(resources=INLINE, hide_banner=True)
        show(layout)


In [61]:
block = block_collection[2]
block.handle_eye_videos()
block.parse_open_ephys_events()
block.handle_arena_files()
block.get_eye_brightness_vectors()
# Build and save the per-eye DataFrames
dfL, dfR = simple_sync_build(block, export=True)

# Quick look and manual correction to LED grid
plot_simple_sync_bokeh(block, dfL, dfR, show_led=True)


handling eye video files
converting videos...
converting files: ['Z:\\Nimrod\\experiments\\PV_208\\2025_12_09\\block_015\\eye_videos\\LE\\pv_208_d4t4\\pv_208_d4t4.h264', 'Z:\\Nimrod\\experiments\\PV_208\\2025_12_09\\block_015\\eye_videos\\RE\\pv_208_d4t4\\pv_208_d4t4.h264'] 
 avoiding conversion on files: ['Z:\\Nimrod\\experiments\\PV_208\\2025_12_09\\block_015\\eye_videos\\LE\\pv_208_d4t4\\pv_208_d4t4.mp4', 'Z:\\Nimrod\\experiments\\PV_208\\2025_12_09\\block_015\\eye_videos\\RE\\pv_208_d4t4\\pv_208_d4t4.mp4']
The file Z:\Nimrod\experiments\PV_208\2025_12_09\block_015\eye_videos\LE\pv_208_d4t4\pv_208_d4t4.mp4 already exists, no conversion necessary
The file Z:\Nimrod\experiments\PV_208\2025_12_09\block_015\eye_videos\RE\pv_208_d4t4\pv_208_d4t4.mp4 already exists, no conversion necessary
Validating videos...
The video named pv_208_d4t4.mp4 has reported 114725 frames and has 114725 frames, it has dropped 0 frames
The video named pv_208_d4t4.mp4 has reported 114370 frames and has 114370 f

Processing Z:\Nimrod\experiments\PV_208\2025_12_09\block_015\eye_videos\LE\pv_208_d4t4\pv_208_d4t4_LE.mp4:   0%|          | 182/114725 [00:00<01:02, 1819.52frame/s]

Working on video Z:\Nimrod\experiments\PV_208\2025_12_09\block_015\eye_videos\LE\pv_208_d4t4\pv_208_d4t4_LE.mp4


Processing Z:\Nimrod\experiments\PV_208\2025_12_09\block_015\eye_videos\LE\pv_208_d4t4\pv_208_d4t4_LE.mp4: 100%|██████████| 114725/114725 [00:56<00:00, 2032.07frame/s]
Processing Z:\Nimrod\experiments\PV_208\2025_12_09\block_015\eye_videos\RE\pv_208_d4t4\pv_208_d4t4.mp4:   0%|          | 187/114370 [00:00<01:01, 1869.60frame/s]

Finished video Z:\Nimrod\experiments\PV_208\2025_12_09\block_015\eye_videos\LE\pv_208_d4t4\pv_208_d4t4_LE.mp4, processed 114725 frames
Working on video Z:\Nimrod\experiments\PV_208\2025_12_09\block_015\eye_videos\RE\pv_208_d4t4\pv_208_d4t4.mp4


Processing Z:\Nimrod\experiments\PV_208\2025_12_09\block_015\eye_videos\RE\pv_208_d4t4\pv_208_d4t4.mp4: 100%|██████████| 114370/114370 [00:59<00:00, 1910.76frame/s]


Finished video Z:\Nimrod\experiments\PV_208\2025_12_09\block_015\eye_videos\RE\pv_208_d4t4\pv_208_d4t4.mp4, processed 114370 frames
Eye brightness vectors generation complete.
[LEFT] frames=114,725 | median fps=62.492 | CoV(dt)=8.35% | outliers(±1–99%)=1.95%
[WARN] LEFT: CoV(dt) > 5.0%. Stream may be unstable.
[RIGHT] frames=114,370 | median fps=62.496 | CoV(dt)=31.30% | outliers(±1–99%)=1.97%
[WARN] RIGHT: CoV(dt) > 5.0%. Stream may be unstable.
[OK] Saved: Z:\Nimrod\experiments\PV_208\2025_12_09\block_015\analysis\eye_left_simple_sync.csv
[OK] Saved: Z:\Nimrod\experiments\PV_208\2025_12_09\block_015\analysis\eye_right_simple_sync.csv
[INFO] Slider tick ≈ 16.000 ms (Left), 16.000 ms (Right)


In [62]:
# Inspect ms per tick (helps translate slider “ticks” to ms)
print("Left tick ≈ %.3f ms"  % describe_eye_tick(dfL))
print("Right tick ≈ %.3f ms" % describe_eye_tick(dfR))

Left tick ≈ 16.000 ms
Right tick ≈ 16.000 ms


In [63]:
plot_simple_sync_bokeh(block, dfL, dfR, show_led=True)

[INFO] Slider tick ≈ 16.000 ms (Left), 16.000 ms (Right)


In [64]:
# Use the function to apply shift:
dfL_shifted = shift_eye_df_by_index(dfL,2)
dfR_shifted = shift_eye_df_by_index(dfR,2)

In [65]:
# use this function to verify shift implemented
plot_simple_sync_bokeh(block, dfL_shifted, dfR_shifted, show_led=True)

[INFO] Slider tick ≈ 16.000 ms (Left), 16.000 ms (Right)


In [66]:
# Merge onto the 60 Hz arena grid with nearest-with-tolerance (no resort of eye dfs)
final_df = build_final_sync_df_merge_nearest(
    block, dfL_shifted, dfR_shifted,
    target_fps=60.0,
    tol_frac=0.90,      # accept nearest within 90% of a 60 Hz tick; tune 0.7–1.2 if needed
    pre_shift_left=0,   # IMPORTANT: already pre-shifted
    pre_shift_right=0,
    export_csv=True
)

# 3) Sanity check: this should now reproduce the same crisp alignment you saw pre-merge
#sanity_check_final_sync_bokeh(block, final_df=final_df, show_led=True, shift_range=200)

[OK] Saved final sync CSV → Z:\Nimrod\experiments\PV_208\2025_12_09\block_015\analysis\blocksync_df.csv
[INFO] Grid rows: 111,365 | tick ≈ 16.650 ms | tol=300 samp (tol_frac=0.90)
[INFO] Valid LEFT grid points: 110,292 | Valid RIGHT grid points: 109,955


In [67]:

fs = float(block.sample_rate)
sanity_plot_final_df(final_df, fs, show_led_off=True, led_off_samples=block.oe_events['LED_driver'].dropna().astype(int).to_numpy())


In [35]:
final_df.iloc[5000:5005]

Unnamed: 0,Arena_TTL,Arena_frame,L_eye_frame,R_eye_frame,L_values,R_values
5000,2120151.0,4919.0,4657.0,4655.0,221.32967,221.27619
5001,2120484.0,4920.0,4658.0,4657.0,221.32967,221.195238
5002,2120817.0,4921.0,4659.0,4658.0,221.456044,221.2
5003,2121150.0,4922.0,4660.0,4659.0,221.137363,221.114286
5004,2121483.0,4923.0,4661.0,4660.0,221.362637,221.12381


In [68]:
import numpy as np
import pandas as pd

def _assert_strict_inc(name, arr):
    if arr.size < 2 or not np.all(np.diff(arr) > 0):
        raise ValueError(f"{name} must be strictly increasing.")

def _build_arena_grid_from_block(block, target_fps):
    fs = float(block.sample_rate)
    arena = (block.oe_events[['Arena_TTL','Arena_TTL_frame']]
             .dropna().astype({'Arena_TTL': int}).sort_values('Arena_TTL'))
    if len(arena) < 2:
        raise RuntimeError("Not enough Arena_TTL events.")
    step = int(round(fs / target_fps))
    start = int(arena['Arena_TTL'].iloc[0]); stop = int(arena['Arena_TTL'].iloc[-1])
    grid = np.arange(start, stop+1, step, dtype=np.int64)
    return fs, grid, step

def _nearest_with_tol(sorted_vec, queries, tol):
    _assert_strict_inc("sorted_vec", sorted_vec)
    pos = np.searchsorted(sorted_vec, queries, 'left')
    pos0 = np.clip(pos-1, 0, len(sorted_vec)-1)
    pos1 = np.clip(pos,   0, len(sorted_vec)-1)
    d0 = np.abs(sorted_vec[pos0] - queries)
    d1 = np.abs(sorted_vec[pos1] - queries)
    idx = np.where(d0 <= d1, pos0, pos1)
    d = np.minimum(d0, d1)
    idx[d > tol] = -1
    return idx

def verify_final_df_against_sources(block, final_df, df_left, df_right, target_fps=60.0, tol_frac=0.9):
    """
    Recompute the grid mapping from df_left/df_right → 60 Hz grid and compare to final_df.
    Assumes df_left/df_right already include any pre-shifts you wanted.
    """
    fs, grid, step = _build_arena_grid_from_block(block, target_fps)
    tol = int(np.ceil(tol_frac * step))

    # 1) Check that final_df Arena_TTL matches the recomputed grid exactly
    ft = np.asarray(final_df['Arena_TTL'], dtype=float)
    if not np.array_equal(ft, grid.astype(float)):
        # allow tiny float error:
        if not np.allclose(ft, grid.astype(float), rtol=0, atol=0.5):
            raise AssertionError("final_df['Arena_TTL'] does not match the arena grid from block (even within 0.5 sample).")
    # 2) Map sources to grid
    def map_eye(df_eye):
        t = df_eye.index.to_numpy(dtype=np.int64)
        _assert_strict_inc("df_eye.index (oe_sample)", t)
        fi = df_eye['frame_idx'].to_numpy(dtype=float)
        y  = df_eye['brightness'].to_numpy(dtype=float)
        idx = _nearest_with_tol(t, grid, tol)     # indices into t
        frames = np.full(grid.shape, np.nan, dtype=float)
        vals   = np.full(grid.shape, np.nan, dtype=float)
        ok = idx >= 0
        frames[ok] = fi[idx[ok]]
        vals[ok]   = y[idx[ok]]
        return frames, vals

    Lf, Lv = map_eye(df_left)
    Rf, Rv = map_eye(df_right)

    # 3) Compare with final_df columns
    Lf0 = final_df['L_eye_frame'].to_numpy(dtype=float)
    Lv0 = final_df['L_values'].to_numpy(dtype=float)
    Rf0 = final_df['R_eye_frame'].to_numpy(dtype=float)
    Rv0 = final_df['R_values'].to_numpy(dtype=float)

    # match metrics (treat NaN==NaN as match)
    def nan_equal(a,b):
        eq = (a==b) | (np.isnan(a) & np.isnan(b))
        return np.nan_to_num(eq.astype(float)).astype(bool)

    mLf = nan_equal(Lf, Lf0).mean()
    mLv = nan_equal(Lv, Lv0).mean()
    mRf = nan_equal(Rf, Rf0).mean()
    mRv = nan_equal(Rv, Rv0).mean()

    n = len(grid)
    print(f"[VERIFY] Grid length={n}, tick≈{1000.0*step/fs:.3f} ms, tol={tol} samples")
    print(f"[VERIFY] Left  frame match: {mLf*100:.3f}%   Left  values match: {mLv*100:.3f}%")
    print(f"[VERIFY] Right frame match: {mRf*100:.3f}%   Right values match: {mRv*100:.3f}%")

    # show first few mismatches (if any)
    badL = np.where(~nan_equal(Lv, Lv0))[0][:10]
    badR = np.where(~nan_equal(Rv, Rv0))[0][:10]
    if badL.size:
        print(f"[VERIFY] Example LEFT mismatches at grid idx: {badL}")
    if badR.size:
        print(f"[VERIFY] Example RIGHT mismatches at grid idx: {badR}")

    return dict(
        tick_ms=1000.0*step/fs, tol_samples=tol,
        left_frame_match=mLf, left_values_match=mLv,
        right_frame_match=mRf, right_values_match=mRv
    )


In [69]:
# dfL_s/dfR_s are the eye DFs you actually shifted (exact slider semantics)
stats = verify_final_df_against_sources(block, final_df, dfL_shifted, dfR_shifted, target_fps=60.0, tol_frac=0.9)


[VERIFY] Grid length=111365, tick≈16.650 ms, tol=300 samples
[VERIFY] Left  frame match: 100.000%   Left  values match: 100.000%
[VERIFY] Right frame match: 100.000%   Right values match: 100.000%


In [70]:
# export step for final_sync_df:
from pathlib import Path
import pandas as pd

def export_final_sync_df(block,
                         final_df: pd.DataFrame,
                         overwrite: bool = True,
                         filenames = ("blocksync_df.csv", "final_sync_df.csv"),
                         ms_axis=True) -> None:
    """
    Save `final_df` into block.analysis_path (overwriting if requested) and set:
      - block.final_sync_df
      - block.blocksync_df   (legacy compatibility)

    Expected columns:
      ['Arena_TTL','Arena_frame','L_eye_frame','R_eye_frame','L_values','R_values']
    """
    # 1) Validate schema
    required = ['Arena_TTL','Arena_frame','L_eye_frame','R_eye_frame','L_values','R_values']
    missing = [c for c in required if c not in final_df.columns]
    if missing:
        raise ValueError(f"final_df missing required columns: {missing}")

    # Legacy expects Arena_TTL as float (… .0). Keep everything else as-is.
    out = final_df.copy()
    out['Arena_TTL'] = out['Arena_TTL'].astype(float)
    if ms_axis:
        out['ms_axis'] = out['Arena_TTL'] / (block.sample_rate/1000)
    # 2) Write files
    ap = Path(block.analysis_path)
    ap.mkdir(parents=True, exist_ok=True)

    for name in filenames:
        p = ap / name
        existed = p.exists()
        if existed and not overwrite:
            print(f"[SKIP] {p.name} exists and overwrite=False")
            continue
        out.to_csv(p, index=False)
        print(f"[OK] {'Overwrote' if existed else 'Wrote'} {p}")

    # 3) Set attributes for downstream code
    block.final_sync_df = out
    # Many of your older functions read `block.blocksync_df`, so set it too:
    block.blocksync_df = out
    print("[OK] block.final_sync_df (and block.blocksync_df) set.")
export_final_sync_df(block,final_df=final_df,overwrite=True)

[OK] Overwrote Z:\Nimrod\experiments\PV_208\2025_12_09\block_015\analysis\blocksync_df.csv
[OK] Wrote Z:\Nimrod\experiments\PV_208\2025_12_09\block_015\analysis\final_sync_df.csv
[OK] block.final_sync_df (and block.blocksync_df) set.
