## I have some strange observations of the first roi not being a zero roi and it happeneing at different times. I want to see how prevelant this is and think about how I might acount for this 

In [1]:
from pathlib import Path
import pandas as pd

def _extract_adc_headers(hdr_path: Path):
    with open(hdr_path, "r") as f:
        for line in f:
            if line.startswith("ADCFileFormat:"):
                return [h.strip() for h in line.split(":", 1)[1].split(",")]
    raise ValueError(f"ADCFileFormat not found in header file: {hdr_path}")

def _read_adc_with_headers(adc_path: Path, hdr_path: Path) -> pd.DataFrame:
    headers = _extract_adc_headers(hdr_path)
    df = pd.read_csv(adc_path, header=None)
    df.columns = headers[:df.shape[1]]
    return df

def summarize_first_adc_row(data_dir: str) -> pd.DataFrame:
    data_dir = Path(data_dir)
    rows = []

    for hdr_path in sorted(data_dir.rglob("*.hdr")):
        adc_path = hdr_path.with_suffix(".adc")
        if not adc_path.exists():
            continue

        prefix = hdr_path.stem

        try:
            df = _read_adc_with_headers(adc_path, hdr_path)
            if df.empty:
                continue

            # Ensure required columns exist
            required = ["RunTime", "InhibitTime", "RoiX", "RoiY"]
            missing = [c for c in required if c not in df.columns]
            if missing:
                rows.append({
                    "file": prefix,
                    "first_RunTime": None,
                    "first_InhibitTime": None,
                    "first_RoiX": None,
                    "first_RoiY": None,
                    "roi_binary": None,
                    "note": f"missing columns: {missing}"
                })
                continue

            # Pull first row
            first = df.iloc[0]

            rt = pd.to_numeric(first["RunTime"], errors="coerce")
            inh = pd.to_numeric(first["InhibitTime"], errors="coerce")
            rx = pd.to_numeric(first["RoiX"], errors="coerce")
            ry = pd.to_numeric(first["RoiY"], errors="coerce")

            # zero ROI defined as RoiX==0 and RoiY==0
            roi_binary = None
            if pd.notna(rx) and pd.notna(ry):
                roi_binary = 0 if (rx == 0 and ry == 0) else 1

            rows.append({
                "file": prefix,
                "first_RunTime": rt,
                "first_InhibitTime": inh,
                "first_RoiX": rx,
                "first_RoiY": ry,
                "roi_binary": roi_binary,
                "note": ""
            })

        except Exception as e:
            rows.append({
                "file": prefix,
                "first_RunTime": None,
                "first_InhibitTime": None,
                "first_RoiX": None,
                "first_RoiY": None,
                "roi_binary": None,
                "note": f"error: {e}"
            })

    return pd.DataFrame(rows).sort_values("file").reset_index(drop=True)


In [56]:
df_first = summarize_first_adc_row("../IFCBData/AlexandriumTest/")
print(df_first)
#print(df_first[['first_RunTime' , 'roi_binary' ]])

                       file  first_RunTime  first_InhibitTime  first_RoiX  \
0  D20240420T095637_IFCB124       0.072678           0.000000         0.0   
1  D20240423T013135_IFCB124       7.256404           0.071204       788.0   
2  D20240429T195718_IFCB124       0.074151           0.000000         0.0   
3  D20240429T213320_IFCB124       0.072131           0.000000         0.0   
4  D20240501T235832_IFCB124       0.071005           0.000000         0.0   

   first_RoiY  roi_binary note  
0         0.0           0       
1       614.0           1       
2         0.0           0       
3         0.0           0       
4         0.0           0       


In [9]:
### Checking the above is correct can load in an adc file directly and compair first row
test = pd.read_csv('../IFCBData/DenseAlex/nauset/D20240402T064320_IFCB124.adc')
print(test.head())

   1  0.0605366  0.0018692017  0.044973493  0.0028192997  0.0050747395  \
0  2   4.408047      0.001517     0.012504      0.002854      0.004928   
1  3   4.503701      0.012657     0.084206      0.002866      0.005155   
2  4   4.621901      0.001503     0.003767      0.002772      0.004659   
3  5   4.758038      0.008116     0.009092      0.002800      0.004803   
4  6   5.157566      0.151186     0.733638      0.002952      0.005845   

   0.1294899  0.5991101  0.015001297  0.0186944  ...  0.1  0.2  0.3    0.4  \
0   0.031629   0.266523     0.014801   0.018516  ...  526   72   52      0   
1   0.201478   1.142721     0.015054   0.018790  ...  518  112   84   3744   
2   0.012944   0.149105     0.014791   0.018508  ...  638  112   84  13152   
3   0.134089   0.201278     0.014975   0.018668  ...  510   88   84  22560   
4   1.473861   3.507657     0.015190   0.018876  ...  358  184  172  29952   

   0.5  0.6  0.7  0.8  0.09454427083333333       0.9  
0    0    0    0    0          

In [51]:
## Some super basic stats about what first roi looks like 

N = len(df_first)
## how many zero rois total
n_zero = (df_first["roi_binary"] == 0).sum()
frac_zero = n_zero / N
print(f"total rois: {N}")
print(f"Zero ROIs: {n_zero} ({frac_zero:.3f})")

total rois: 56
Zero ROIs: 12 (0.214)


In [52]:
## How many have first entry over t = 0.1

mask_rt = df_first["first_RunTime"] > 0.1

n_rt = mask_rt.sum()
frac_rt = n_rt / N
n1_rt=N-n_rt

print(f"first_RunTime less than 0.1: {n1_rt}")
print(f"first_RunTime greater than 0.1 s: {n_rt} ({frac_rt:.3f})")


first_RunTime less than 0.1: 5
first_RunTime greater than 0.1 s: 51 (0.911)


In [53]:
mask_rt_zero = mask_rt & (df_first["roi_binary"] == 0)

n_rt_zero = mask_rt_zero.sum()
frac_rt_zero = n_rt_zero / n_rt if n_rt > 0 else float("nan")

print(f"Zero ROIs with first_RunTime > 0.1 s: {n_rt_zero} ({frac_rt_zero:.3f})")


Zero ROIs with first_RunTime > 0.1 s: 7 (0.137)


In [54]:
summary = pd.DataFrame({
    "metric": [
        "Zero ROIs",
        "first_RunTime > 0.1 s",
        "Zero ROIs | first_RunTime > 0.1 s"
    ],
    "count": [
        n_zero,
        n_rt,
        n_rt_zero
    ],
    "fraction total": [
        frac_zero,
        frac_rt,
        frac_rt_zero
    ]
})

print(summary)


                              metric  count  fraction total
0                          Zero ROIs     12        0.214286
1              first_RunTime > 0.1 s     51        0.910714
2  Zero ROIs | first_RunTime > 0.1 s      7        0.137255


In [None]:
## Making sure that the inhibit time diff is computed correctly 

In [None]:
# identify false trigger
false_mask = (df["RunTime"] < 0.25) & (df["RoiX"] == 0) & (df["RoiY"] == 0)

# drop it (could be 0 or 1 row, possibly more if you want to be extra safe)
df = df.loc[~false_mask].sort_values("RunTime").reset_index(drop=True)

# recompute InhibitTimeDiff from cumulative InhibitTime
df["InhibitTime"] = pd.to_numeric(df["InhibitTime"], errors="coerce")
df["InhibitTimeDiff"] = df["InhibitTime"].diff()
df.loc[0, "InhibitTimeDiff"] = df.loc[0, "InhibitTime"]
