## Building a plugin to pull data directly from dashbaord

### generate_ifcb_pids
this creates a list of pids that it will references as it pulls data files from a set date time range 
input the time of interest you want to look at 

I can not get the api to work for filtering so hacky way to get around is just make a list of all the pids and then filter that to just the times you want so that is what get_ifcb_bins_datetime_filtered does

### Hit API for required files 
download_file -- helper function that pulls api and saves without overwriting if the file already exists
download_ifcb_bins - used download_file to loop over PID list for specified file types -- probably easy to add other metadata stuff


In [9]:
## Does not currently work the API does not use standard date time or I have something wrong so this just makes a list of all PIDs in a dataset

import requests
import pandas as pd
from datetime import datetime

def get_ifcb_bins_datetime(base_url: str,
                           dataset: str,
                           start: str,
                           end: str) -> pd.DataFrame:
    """
    Query the IFCB dashboard API to get a list of bins (PIDs)
    for a given dataset and datetime range.

    Parameters
    ----------
    base_url : str
        e.g. "https://habon-ifcb.whoi.edu"
    dataset : str
        e.g. "dagm01"
    start, end : str
        ISO-like datetime strings, e.g. "2023-12-14T09:00:00".

    Returns
    -------
    DataFrame with at least a 'pid' column.
    """
    base_url = base_url.rstrip("/")
    url = (
        f"{base_url}/api/list_bins"
        f"?dataset={dataset}"
        f"&start_datetime={start}"
        f"&end_datetime={end}"
    )

    r = requests.get(url)
    r.raise_for_status()
    data = r.json()["data"]
    bins_df = pd.DataFrame(data)
    return bins_df


In [11]:
base_url = "https://habon-ifcb.whoi.edu"
dataset = "nauset"

start_dt = "2025-06-25T09:00:00"
end_dt   = "2025-06-25T14:00:00"

bins_df = get_ifcb_bins_datetime(base_url, dataset, start_dt, end_dt)
bins = bins_df["pid"].tolist()

print(f"Found {len(bins)} bins between {start_dt} and {end_dt}")
print(bins[:5])


Found 52455 bins between 2025-06-25T09:00:00 and 2025-06-25T14:00:00
['D20120318T192302_IFCB000', 'D20120318T192302_IFCB010', 'D20120318T220052_IFCB000', 'D20120318T220052_IFCB010', 'D20120319T135125_IFCB010']


### Above does not seem to actual fiulter by time which is anyoing I should look for/ ask around how API works to see if we can get it to work but simple work around is to just filter this list off all pids to the times I want 

#### that is what is below use for now

In [30]:
import requests
import pandas as pd

def get_ifcb_bins_datetime_filtered(base_url: str,
                                    dataset: str,
                                    start: str,
                                    end: str) -> pd.DataFrame:
    """
    Get all bins for a dataset from /api/list_bins, then filter by sample_time
    between 'start' and 'end' (inclusive).

    Parameters
    ----------
    base_url : e.g. "https://habon-ifcb.whoi.edu"
    dataset  : e.g. "nauset"
    start, end : ISO-like datetime strings, e.g. "2025-06-25T09:00:00Z"

    Returns
    -------
    DataFrame with only bins in the requested time window.
    """
    base_url = base_url.rstrip("/")
    url = f"{base_url}/api/list_bins?dataset={dataset}"

    r = requests.get(url)
    r.raise_for_status()
    data = r.json()["data"]

    df = pd.DataFrame(data)

    # Adjust this column name if needed, but on HABON it should be 'sample_time'
    df["sample_time"] = pd.to_datetime(df["sample_time"])

    start_dt = pd.to_datetime(start)
    end_dt   = pd.to_datetime(end)

    mask = (df["sample_time"] >= start_dt) & (df["sample_time"] <= end_dt)
    return df.loc[mask].copy()


In [34]:
## example usage ##

base_url = "http://percy.whoi.edu:8000"
dataset = "spawn"

start_dt = "2024-05-1T00:00:00Z"
end_dt   = "2024-05-25T14:00:00Z"

bins_df = get_ifcb_bins_datetime_filtered(base_url, dataset, start_dt, end_dt)
bins = bins_df["pid"].tolist()

print(f"Found {len(bins)} bins between {start_dt} and {end_dt}")
print(bins[:5])


Found 62 bins between 2024-05-1T00:00:00Z and 2024-05-25T14:00:00Z
['D20240501T181138_IFCB145', 'D20240501T182329_IFCB145', 'D20240501T185545_IFCB145', 'D20240501T200201_IFCB145', 'D20240501T204013_IFCB145']


### with PID list now download ADC, HDR, and Class lists

loops through PID list and hits dashboard API for adc, hdr and class lists

In [32]:
from pathlib import Path
from typing import Dict, List

def download_file(url: str,
                  dest_path: Path,
                  overwrite: bool = False,
                  timeout: int = 30) -> bool:
    if dest_path.exists() and not overwrite:
        print(f"[skip] {dest_path} already exists")
        return True

    try:
        r = requests.get(url, timeout=timeout)
    except Exception as e:
        print(f"[error] GET {url} failed: {e}")
        return False

    if r.status_code == 200:
        dest_path.parent.mkdir(parents=True, exist_ok=True)
        dest_path.write_bytes(r.content)
        print(f"[ok]   {url} -> {dest_path}")
        return True
    elif r.status_code == 404:
        print(f"[404]  {url} not found")
        return False
    else:
        print(f"[{r.status_code}] {url} (no download)")
        return False


def download_ifcb_bins(base_url: str,
                       dataset: str,
                       pids: List[str],
                       dest_dir: str,
                       download_adc: bool = True,
                       download_hdr: bool = True,
                       download_class: bool = True,
                       class_suffix: str = "_class_vNone.csv",
                       overwrite: bool = False,
                       timeout: int = 30) -> Dict[str, Dict[str, Path]]:
    base_url = base_url.rstrip("/")
    dest_root = Path(dest_dir)
    files_downloaded: Dict[str, Dict[str, Path]] = {}

    for pid in pids:
        print(f"\n=== {pid} ===")
        pid_results: Dict[str, Path] = {}

        if download_adc:
            adc_url = f"{base_url}/{dataset}/{pid}.adc"
            adc_path = dest_root / dataset / f"{pid}.adc"
            if download_file(adc_url, adc_path, overwrite=overwrite, timeout=timeout):
                pid_results["adc"] = adc_path

        if download_hdr:
            hdr_url = f"{base_url}/{dataset}/{pid}.hdr"
            hdr_path = dest_root / dataset / f"{pid}.hdr"
            if download_file(hdr_url, hdr_path, overwrite=overwrite, timeout=timeout):
                pid_results["hdr"] = hdr_path

        if download_class:
            class_filename = f"{pid}{class_suffix}"
            class_url = f"{base_url}/{dataset}/{class_filename}"
            class_path = dest_root / dataset / class_filename
            if download_file(class_url, class_path, overwrite=overwrite, timeout=timeout):
                pid_results["class"] = class_path

        if pid_results:
            files_downloaded[pid] = pid_results

    return files_downloaded


# example usage 

In [35]:
## example usage

# 1) Filter bins by datetime
bins_df = get_ifcb_bins_datetime_filtered(
    base_url = "http://percy.whoi.edu:8000",
    dataset="spawn",
    start = "2024-05-1T00:00:00Z",
    end   = "2024-05-25T14:00:00Z",
)
bins = bins_df["pid"].tolist()

# 2) Download files only for those bins
files = download_ifcb_bins(
    base_url = "http://percy.whoi.edu:8000",
    dataset="spawn",
    pids=bins,
    dest_dir="../../IFCBData/spawn/All/",
    class_suffix="_class_scores.csv",  # change if nauset uses a different suffix -- just go to the dashboard and copy class link as text and see what it looks like if unsure
)



=== D20240501T181138_IFCB145 ===
[ok]   http://percy.whoi.edu:8000/spawn/D20240501T181138_IFCB145.adc -> ../../IFCBData/spawn/All/spawn/D20240501T181138_IFCB145.adc
[ok]   http://percy.whoi.edu:8000/spawn/D20240501T181138_IFCB145.hdr -> ../../IFCBData/spawn/All/spawn/D20240501T181138_IFCB145.hdr
[404]  http://percy.whoi.edu:8000/spawn/D20240501T181138_IFCB145_class_scores.csv not found

=== D20240501T182329_IFCB145 ===
[ok]   http://percy.whoi.edu:8000/spawn/D20240501T182329_IFCB145.adc -> ../../IFCBData/spawn/All/spawn/D20240501T182329_IFCB145.adc
[ok]   http://percy.whoi.edu:8000/spawn/D20240501T182329_IFCB145.hdr -> ../../IFCBData/spawn/All/spawn/D20240501T182329_IFCB145.hdr
[404]  http://percy.whoi.edu:8000/spawn/D20240501T182329_IFCB145_class_scores.csv not found

=== D20240501T185545_IFCB145 ===
[ok]   http://percy.whoi.edu:8000/spawn/D20240501T185545_IFCB145.adc -> ../../IFCBData/spawn/All/spawn/D20240501T185545_IFCB145.adc
[ok]   http://percy.whoi.edu:8000/spawn/D20240501T1855