In [28]:
import datetime
import json
from pathlib import Path

import imageio.v2 as imageio
import numpy as np
import pandas as pd
import rasterio
import tqdm
from matplotlib import pyplot as plt
from PIL import Image, ImageDraw, ImageFont

from estuary.clay.data import parse_dt_from_pth
from estuary.util import masked_contrast_stretch

In [None]:
p = "/Users/kyledorman/data/estuary/skysat/results/2023/navarro_river/files/20230918_185103_ssc1_u0001_pansharpened_clip.tif"
with rasterio.open(p) as src:
    data = src.read(out_dtype=np.float32)
    nodata = src.read(1, masked=True).mask
data = np.log10(data + 1)
imgd = masked_contrast_stretch(data, ~nodata, p_low=1, p_high=99)
rgb = imgd[[2, 1, 0]].transpose((1, 2, 0))
sky_img = Image.fromarray(np.array(np.clip(rgb * 255, 0, 255), dtype=np.uint8))
sky_img.save("/Users/kyledorman/data/estuary/display/skysat_full_navarro_river.png")
sky_img.resize((512, 512))

In [None]:
# Optional: try a nicer font; fall back to default if not available
try:
    FONT = ImageFont.truetype("/System/Library/Fonts/Supplemental/Arial Bold.ttf", 20)
except Exception:
    FONT = ImageFont.load_default()


def draw_label(
    img: Image.Image, text: str, color: tuple[int, int, int], add_border=True
) -> Image.Image:
    """Draw a semi-transparent banner with outlined text, and optional colored border."""
    draw = ImageDraw.Draw(img, "RGBA")
    w, h = img.size

    # Banner box
    pad_x, pad_y = 10, 8
    text_w, text_h = draw.textbbox((0, 0), text, font=FONT)[2:]
    box_w = min(w - 2 * pad_x, text_w + 2 * pad_x)
    box_h = text_h + 2 * pad_y

    # Top-left anchor for banner
    x0, y0 = pad_x, pad_y
    x1, y1 = x0 + box_w, y0 + box_h

    # Semi-transparent dark banner
    draw.rounded_rectangle([x0, y0, x1, y1], radius=10, fill=(0, 0, 0, 110))

    # Outlined text (stroke) for readability
    draw.text(
        (x0 + pad_x, y0 + pad_y),
        text,
        font=FONT,
        fill=(255, 255, 255, 255),
        stroke_width=2,
        stroke_fill=(0, 0, 0, 220),
    )

    # Optional border matching class color
    if add_border:
        draw.rectangle([0, 0, w - 1, h - 1], outline=color + (255,), width=4)

    return img

In [31]:
valid_df = pd.read_csv("/Users/kyledorman/data/estuary/validv3.csv")
valid_df["acquired"] = valid_df.source_tif.apply(lambda a: parse_dt_from_pth(Path(a)))
valid_df["acquired"] = pd.to_datetime(valid_df["acquired"], errors="coerce")
valid_df = valid_df.sort_values(by=["region", "acquired"])
valid_df["acquired_date"] = valid_df.acquired.dt.date
valid_df.head(3)

Unnamed: 0,region,udm_path,source_tif,label_idx,pred,conf,acquired,acquired_date
535,big_sur_river,/Users/kyledorman/data/estuary/dove/results/20...,/Users/kyledorman/data/estuary/dove/results/20...,0,0,0.0,2019-01-01 18:25:38,2019-01-01
536,big_sur_river,/Users/kyledorman/data/estuary/dove/results/20...,/Users/kyledorman/data/estuary/dove/results/20...,0,0,0.0,2019-01-03 17:54:52,2019-01-03
542,big_sur_river,/Users/kyledorman/data/estuary/dove/results/20...,/Users/kyledorman/data/estuary/dove/results/20...,0,0,0.0,2019-01-04 17:54:40,2019-01-04


In [30]:
counts = valid_df.groupby("region")["acquired_date"].nunique()
counts.head(15)

region
big_sur_river             672
carmel                    548
goleta                    751
little_sur                573
los_penasquitos_lagoon    654
malibu_lagoon             741
navarro_river             605
pismo_creek_lagoon        742
russian_river             690
san_dieguito_lagoon       696
san_elijo_lagoon          688
san_mateo_lagoon          733
santa_margarita           673
topanga                   809
ventura                   712
Name: acquired_date, dtype: int64

In [33]:
preds = pd.read_csv("/Users/kyledorman/data/estuary/predsv3.csv")
preds["acquired"] = pd.to_datetime(preds["acquired"], errors="coerce")

preds = preds.sort_values(by=["region", "acquired"])
preds["acquired_date"] = preds.acquired.dt.date

preds.head(3)

Unnamed: 0,index,region,udm_path,source_tif,label_idx,pred,conf,acquired,acquired_date
0,535,big_sur_river,/Users/kyledorman/data/estuary/dove/results/20...,/Users/kyledorman/data/estuary/dove/results/20...,0,0,0.995433,2019-01-01 18:25:38,2019-01-01
1,536,big_sur_river,/Users/kyledorman/data/estuary/dove/results/20...,/Users/kyledorman/data/estuary/dove/results/20...,0,0,0.99833,2019-01-03 17:54:52,2019-01-03
2,542,big_sur_river,/Users/kyledorman/data/estuary/dove/results/20...,/Users/kyledorman/data/estuary/dove/results/20...,0,0,0.998993,2019-01-04 17:54:40,2019-01-04


In [34]:
labels = pd.read_csv("/Users/kyledorman/data/estuary/label_studio/00025/labels.csv")
labels["acquired"] = labels.source_tif.apply(lambda a: parse_dt_from_pth(Path(a)))
labels["acquired"] = pd.to_datetime(labels["acquired"], errors="coerce")
labels["acquired_date"] = labels.acquired.dt.date
labels = labels[labels.label != "unsure"]
labels = labels.sort_values(by=["region", "acquired"]).reset_index()

labels.head(3)

Unnamed: 0,index,region,source_tif,source_jpeg,label,acquired,acquired_date
0,2530,big_sur_river,/Users/kyledorman/data/estuary/dove/results/20...,/Users/kyledorman/data/estuary/label_studio/00...,open,2019-01-19 18:26:39,2019-01-19
1,2597,big_sur_river,/Users/kyledorman/data/estuary/dove/results/20...,/Users/kyledorman/data/estuary/label_studio/00...,open,2019-01-24 18:28:06,2019-01-24
2,2435,big_sur_river,/Users/kyledorman/data/estuary/dove/results/20...,/Users/kyledorman/data/estuary/label_studio/00...,open,2019-02-23 18:29:41,2019-02-23


In [35]:
counts = labels.groupby("region")["acquired"].nunique()
counts.head(15)

region
big_sur_river             194
carmel                    199
goleta                    199
little_sur                197
los_penasquitos_lagoon    192
malibu_lagoon             196
navarro_river             198
pismo_creek_lagoon        193
russian_river             195
san_dieguito_lagoon       200
san_elijo_lagoon          199
san_mateo_lagoon          197
santa_margarita           198
topanga                   197
ventura                   195
Name: acquired, dtype: int64

In [36]:
high_res = []
for pth in Path("/Users/kyledorman/data/estuary/skysat/results/").glob(
    "*/*/files/*_pansharpened_clip.tif"
):
    yearmonthday = pth.stem.split("_")[0]
    dt = pd.to_datetime(yearmonthday, format="%Y%m%d")
    high_res.append([pth, pth.parent.parent.name, dt])
high_res_df = pd.DataFrame(high_res, columns=["path", "region", "acquired"])
high_res_df = high_res_df.sort_values(by=["region", "acquired"])

high_res_df.head(3)

Unnamed: 0,path,region,acquired
174,/Users/kyledorman/data/estuary/skysat/results/...,big_sur_river,2019-09-06
196,/Users/kyledorman/data/estuary/skysat/results/...,big_sur_river,2021-02-21
36,/Users/kyledorman/data/estuary/skysat/results/...,big_sur_river,2022-11-12


In [37]:
counts = high_res_df.groupby("region")["acquired"].nunique()
counts.head(15)

region
big_sur_river              4
carmel                     1
goleta                    92
little_sur                 2
los_penasquitos_lagoon     8
malibu_lagoon              2
navarro_river             11
pismo_creek_lagoon         7
russian_river              4
san_dieguito_lagoon       10
san_elijo_lagoon           6
san_mateo_lagoon           8
santa_margarita            6
topanga                   12
ventura                   12
Name: acquired, dtype: int64

In [44]:
data_stats = pd.concat(
    [
        valid_df.groupby("region")["acquired_date"].nunique().rename("dove_total"),
        labels.groupby("region").acquired.nunique().rename("dove_labeled"),
        high_res_df.groupby("region").acquired.nunique().rename("skysat_total"),
    ],
    axis=1,
)

data_stats.to_csv("/Users/kyledorman/data/estuary/display/region_stats.csv")
data_stats.head(15)

Unnamed: 0_level_0,dove_total,dove_labeled,skysat_total
region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
big_sur_river,672,194,4
carmel,548,199,1
goleta,751,199,92
little_sur,573,197,2
los_penasquitos_lagoon,654,192,8
malibu_lagoon,741,196,2
navarro_river,605,198,11
pismo_creek_lagoon,742,193,7
russian_river,690,195,4
san_dieguito_lagoon,696,200,10


In [None]:
CROP_PATH = Path("/Users/kyledorman/data/estuary/label_studio/region_crops.json")
region_crops = json.loads(CROP_PATH.read_bytes())

In [None]:
fig, axes = plt.subplots(3, 5, figsize=(5 * 2.9, 3 * 3), constrained_layout=True)
for (region, rows), ax in zip(preds.groupby("region"), axes.flatten(), strict=False):
    ax.axis("off")
    ax.set_title(" ".join([r.capitalize() for r in region.split("_")]))
    crop = region_crops[region]
    start_w, start_h, end_w, end_h = crop
    w = end_w - start_w
    h = end_h - start_h
    pth = rows.iloc[6].source_tif
    with rasterio.open(pth) as src:
        data = src.read(out_dtype=np.float32)[:, start_h:end_h, start_w:end_w]
        nodata = src.read(1, masked=True).mask[start_h:end_h, start_w:end_w]
    data = np.log10(data + 1)
    imgd = masked_contrast_stretch(data, ~nodata, p_low=1, p_high=99)
    rgb = imgd[[2, 1, 0]].transpose((1, 2, 0))
    img = Image.fromarray(np.array(np.clip(rgb * 255, 0, 255), dtype=np.uint8)).resize((256, 256))
    ax.imshow(img)

plt.savefig("/Users/kyledorman/data/estuary/display/all_sites_dove.png")
plt.show()

In [None]:
fig, axes = plt.subplots(3, 5, figsize=(5 * 2.9, 3 * 3), constrained_layout=True)
for (region, rows), ax in zip(high_res_df.groupby("region"), axes.flatten(), strict=False):
    ax.axis("off")
    ax.set_title(" ".join([r.capitalize() for r in region.split("_")]))
    pth = rows.iloc[0].path
    with rasterio.open(pth) as src:
        data = src.read([3, 2, 1], out_dtype=np.float32)
        nodata = src.read(1, masked=True).mask
    data = np.log10(data + 1)
    imgd = masked_contrast_stretch(data, ~nodata, p_low=1, p_high=99)
    rgb = imgd.transpose((1, 2, 0))
    img = Image.fromarray(np.array(np.clip(rgb * 255, 0, 255), dtype=np.uint8)).resize((512, 512))
    ax.imshow(img)

plt.savefig("/Users/kyledorman/data/estuary/display/all_sites_skysat.png")
plt.show()

In [None]:
region = "topanga"
start = datetime.datetime(year=2023, month=1, day=1)
end = datetime.datetime(year=2023, month=3, day=1)
crop = region_crops[region]
start_w, start_h, end_w, end_h = crop
w = end_w - start_w
h = end_h - start_h

gif_df = preds[(preds.region == region) & (preds.acquired > start) & (preds.acquired < end)]

len(gif_df)

In [None]:
save_path = Path(f"/Users/kyledorman/data/estuary/display/gifs/{region}/{start.date()}.mp4")
save_path.parent.mkdir(exist_ok=True, parents=True)

frames = []
for _, row in tqdm.tqdm(gif_df.iterrows(), total=len(gif_df)):
    pth = row.source_tif
    pred_name = "open" if row.pred == 0 else "close"
    pred_color = (44, 160, 44) if row.pred == 0 else (214, 39, 40)  # green/red
    conf_str = f"{row.conf:.2f}" if "conf" in gif_df.columns else "—"
    date_str = getattr(row, "acquired", None)
    if date_str is not None:
        # Parse YYYYMMDD or ISO-like strings robustly
        try:
            # if already datetime-like, this is a no-op; else try %Y%m%d
            dt = pd.to_datetime(date_str, format="%Y%m%d", errors="ignore")
            dt = pd.to_datetime(dt)  # ensure Timestamp
            date_disp = dt.strftime("%Y-%m-%d")
        except Exception:
            date_disp = str(date_str)
    else:
        date_disp = ""

    with rasterio.open(pth) as src:
        data = src.read(out_dtype=np.float32)[:, start_h:end_h, start_w:end_w]
        nodata = src.read(1, masked=True).mask[start_h:end_h, start_w:end_w]
    data = np.log10(data + 1)
    imgd = masked_contrast_stretch(data, ~nodata, p_low=1, p_high=99)
    rgb = imgd[[2, 1, 0]].transpose((1, 2, 0))
    img = Image.fromarray(np.array(np.clip(rgb * 255, 0, 255), dtype=np.uint8)).resize((256, 256))

    # Compose label text — include region/pred/conf/date as you like
    label_text = f"{pred_name}"
    if date_disp:
        label_text = f"{date_disp} • " + label_text

    img = draw_label(img, label_text, pred_color, add_border=True)

    frames.append(img)

# Convert each PIL frame to a NumPy array (imageio needs ndarray or PIL)
frame_arrays = [np.array(im.convert("RGB")) for im in frames]

In [None]:
# Write MP4 (H.264)
video_path = save_path
fps = 1
imageio.mimsave(
    video_path,
    frame_arrays,
    fps=fps,
    codec="libx264",  # H.264 for compatibility
    quality=10,  # 0 (lowest) - 10 (highest) for libx264
    macro_block_size=None,  # keeps original frame size
)
print(f"Saved video → {video_path}")

In [None]:
from IPython.display import Video

Video(str(video_path), embed=True, width=600)

In [None]:
save_base = Path("/Users/kyledorman/data/estuary/display/skysat")

for region in preds.region.unique():
    pdf = preds[preds.region == region]
    hdf = high_res_df[high_res_df.region == region]
    # Work on sorted copies (required by merge_asof)
    hdf_s = hdf.sort_values("acquired").reset_index(drop=True)
    pdf_s = pdf.sort_values("acquired").reset_index(drop=True)
    # Nearest match within one week
    pairs = pd.merge_asof(
        hdf_s,
        pdf_s,
        on="acquired",
        direction="nearest",
        tolerance=pd.Timedelta("3D"),
        suffixes=("_h", "_p"),
    )
    # Keep only rows that found a match (otherwise columns from pdf will be NaN)
    pairs = pairs.dropna(subset=["path", "source_tif"])

    save = save_base / region
    save.mkdir(exist_ok=True, parents=True)

    crop = region_crops[region]
    start_w, start_h, end_w, end_h = crop
    w = end_w - start_w
    h = end_h - start_h

    for state in [0, 1]:
        for _, row in pairs[pairs.pred == state].iterrows():
            with rasterio.open(row.path) as src:
                data = src.read(out_dtype=np.float32)
                nodata = src.read(1, masked=True).mask
            data = np.log10(data + 1)
            imgd = masked_contrast_stretch(data, ~nodata, p_low=1, p_high=99)
            rgb = imgd[[2, 1, 0]].transpose((1, 2, 0))
            sky_img = Image.fromarray(np.array(np.clip(rgb * 255, 0, 255), dtype=np.uint8)).resize(
                (512, 512)
            )

            pred_name = "open" if row.pred == 0 else "close"

            fig, axes = plt.subplots(1, 2, figsize=(12, 6))
            axes[0].imshow(sky_img)
            axes[0].set_title(f"{pred_name} - {row.acquired.date()} - {region}")
            axes[0].axis("off")

            with rasterio.open(row.source_tif) as src:
                data = src.read(out_dtype=np.float32)[:, start_h:end_h, start_w:end_w]
                nodata = src.read(1, masked=True).mask[start_h:end_h, start_w:end_w]
            data = np.log10(data + 1)
            imgd = masked_contrast_stretch(data, ~nodata, p_low=1, p_high=99)
            rgb = imgd[[2, 1, 0]].transpose((1, 2, 0))
            img = Image.fromarray(np.array(np.clip(rgb * 255, 0, 255), dtype=np.uint8)).resize(
                (256, 256)
            )

            axes[1].imshow(img)
            axes[1].axis("off")

            plt.tight_layout()
            # plt.show()
            plt.savefig(save / f"{pred_name}_{row.acquired.date()}.png")
            plt.close()