# Tidal Analysis

In [None]:
# Standard library
from datetime import datetime
from pathlib import Path

# Third-party libraries
import folium
import matplotlib as mpl
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pvlib
import geopandas as gpd
from pyproj import Transformer
from shapely import Point
from tqdm import tqdm
from branca.colormap import linear
from IPython.display import display

# Your project modules
from src.tides import clean_latlon, find_nearest_coordinate, tide_model

In [None]:
BASE = Path("/Users/kyledorman/data/planet_coverage/ca_only/")  # <-- update this

GRID_ID = 31093

FIG_DIR = BASE.parent / "figs" / "per_grid_tide" / str(GRID_ID)
FIG_DIR.mkdir(exist_ok=True, parents=True)

In [None]:
# Create the base map centered on the calculated location
ca_ocean = gpd.read_file(BASE / "ca_ocean.geojson")
all_grids_df = gpd.read_file(BASE / "ocean_grids.gpkg")
tide_df = pd.read_csv(BASE / "simulated_tidal_coverage.csv")
grid_df = all_grids_df[all_grids_df.cell_id.isin(tide_df.cell_id)].to_crs(ca_ocean.crs)
grid_tide_df = grid_df.merge(tide_df, on='cell_id')

In [None]:
row = tide_df[tide_df.cell_id == GRID_ID]

display(row.planet_low_count)
display(row.planet_high_count)

In [None]:
def plot_column(df, column_name, title):
    # --- Folium map for % ---
    if df[column_name].max() == df[column_name].min():
        scale_min = 0
    else:
        scale_min = df[column_name].min()
    color_scale = linear.viridis.scale(scale_min, df[column_name].max())

    point = grid_df[grid_df.cell_id == GRID_ID].iloc[0].geometry.centroid
    
    m = folium.Map(
        location=[point.y, point.x], 
        zoom_start=3, 
        tiles="CartoDB positron",
        width=1000,
        height=800
    )
    
    for _, row in df.iterrows():
        value = row[column_name]
        geom = row.geometry
        folium.GeoJson(
            data=geom,
            style_function=lambda f, col=color_scale(value): {
                "fillColor": col,
                "color":     col,      # outline same as fill
                "weight":    1,
                "fillOpacity": 0.7,
            },
            tooltip=f"{row.cell_id}: {value:0.1f}",
        ).add_to(m)

    for _, row in grid_df[grid_df.cell_id == GRID_ID].iterrows():
        centroid = row.geometry.centroid
        folium.CircleMarker(
            location=[centroid.y, centroid.x],
            radius=5,
            fill=True,
            fill_opacity=1.0,
            color='red',
        ).add_to(m)
    
    color_scale.caption = title
    color_scale.add_to(m)

    return m

In [None]:
plot_column(grid_tide_df, "planet_low_count", "Planet Low Tide Count")

In [None]:
plot_column(grid_tide_df, "planet_high_count", "Planet High Tide Count")

In [None]:
transformer = Transformer.from_crs(all_grids_df.crs, ca_ocean.crs, always_xy=True)
row = all_grids_df[all_grids_df.cell_id == GRID_ID].iloc[0].geometry
grid_point = Point(*transformer.transform(row.centroid.x, row.centroid.y))

In [None]:
tm = tide_model(Path("/Users/kyledorman/data/tides"), "GOT4.10", "GOT")
start_ts = np.datetime64(datetime(2023, 12, 1))
end_ts = np.datetime64(datetime(2024, 12, 1))

sentinel_time = pd.Timedelta(hours=10, minutes=30)
landsat_time = pd.Timedelta(hours=10)
planet_time = pd.Timedelta(hours=11)
sentinel_stride = 5
landsat_stride = 8
nbins = 10
minutes_sample = 1
mid_tide_delta = 0.5

minutes = np.arange(start_ts, end_ts, np.timedelta64(minutes_sample, "m")).astype("datetime64[ns]")
latlons = np.array([grid_point.y, grid_point.x])

In [None]:
closest_point = find_nearest_coordinate(clean_latlon(latlons), tm.mz, tm.yi, tm.xi)[0]

cp = Point(closest_point[1], closest_point[0])

base_map = folium.Map(location=[cp.y, cp.x], zoom_start=5, width=600, height=600)

for _, row in grid_df[grid_df.cell_id == GRID_ID].iterrows():
    folium.GeoJson(
        row.geometry,
        tooltip=str(row["cell_id"]),
        style_function=lambda feature: {
            "color": "green",
            "weight": 1,
        }
    ).add_to(base_map)

folium.CircleMarker(
    location=[cp.y, cp.x],
    radius=5,
    fill=True,
    fill_opacity=0.7,
    color=None,
    fill_color="red",
).add_to(base_map)

base_map

In [None]:
# obs_hr = pd.read_csv(
#     "/Users/kyledorman/data/tides/guages/broome/IDO71013_2023.csv", 
#     parse_dates=[' Date & UTC Time'], 
#     # index_col=' Date & UTC Time'
# ).rename(columns={
#     'Sea Level':'obs_m',
#     ' Date & UTC Time': 'timestamp'
# }).set_index('timestamp')
# obs_hr["obs_delta"] = obs_hr.obs_m - obs_hr.obs_m.mean()

# obs_hr

In [None]:
# pred_tide_elevations = tm.tide_elevations(np.array(latlons), times=[obs_hr.index.to_numpy()])[0]
# pred = pd.Series(pred_tide_elevations, index=obs_hr.index, name="tmd_m")

# wa = obs_hr.tz_localize("UTC").tz_convert("Australia/Perth")
# pred_wa = pred.tz_localize("UTC").tz_convert("Australia/Perth")

# common = pd.concat([obs_hr["obs_delta"], pred], axis=1).dropna()
# rmse = np.sqrt(((common["obs_delta"]-common["tmd_m"])**2).mean())
# corr = common.corr().iloc[0,1]

# fig, ax = plt.subplots(figsize=(10,4))
# common.plot(ax=ax, alpha=0.8)
# ax.set_title(f"Broome {obs_hr.index.min().year} | RMSE={rmse:.02f} m  R={corr:.2f}")
# ax.set_ylabel("Sea-level delta (m)")
# plt.savefig(FIG_DIR / "compare_to_broome.png")
# plt.show()

In [None]:
tide_elevations = tm.tide_elevations(latlons, times=[minutes])[0]
orig_tides_df = pd.DataFrame(
    {
        "acquired": minutes,
        "tide_height": tide_elevations,
        "lat": latlons[0],
        "lon": latlons[1],
    }
)

In [None]:
q_edges = np.linspace(0.0, 1.0, nbins + 1)
height_edges = np.quantile(tide_elevations, q_edges)

# Make the first/last edges a hair wider so everything is included
eps = 1e-6
height_edges[0] -= eps
height_edges[-1] += eps
# this gives integer bins 0–nbins-1
tides_df = orig_tides_df.copy()
tides_df["height_bin"] = pd.cut(
    tides_df["tide_height"], bins=height_edges, labels=False, include_lowest=True  # type: ignore
)  # type: ignore
assert not tides_df["height_bin"].isna().any()

mean_tide_height = tides_df.tide_height.mean()
tide_diff = tides_df.tide_height - mean_tide_height
tides_df["is_mid_tide"] = (tide_diff > -mid_tide_delta) & (tide_diff < mid_tide_delta)

height_edges.round(1)

In [None]:
solpos = pvlib.solarposition.get_solarposition(tides_df.acquired, tides_df.lat, tides_df.lon)
tides_df["eot"] = pd.to_timedelta(solpos["equation_of_time"], unit="m").to_numpy()
tides_df["lon_term"] = pd.to_timedelta(tides_df.lon / 15.0, unit="h").to_numpy()
tides_df["solar_time"] = tides_df.acquired + tides_df.eot + tides_df.lon_term
tides_df["solar_time_offset"] = tides_df.solar_time - tides_df.solar_time.dt.normalize()

#  / 3.6e+12 nanoseconds to hours
tides_df["sentinel_offset"] = (
    np.abs(np.float32((sentinel_time - tides_df.solar_time_offset).values)) / 3.6e12  # type: ignore
)  # type: ignore
tides_df["landsat_offset"] = (
    np.abs(np.float32((landsat_time - tides_df.solar_time_offset).values)) / 3.6e12  # type: ignore
)  # type: ignore
tides_df["planet_offset"] = (
    np.abs(np.float32((planet_time - tides_df.solar_time_offset).values)) / 3.6e12  # type: ignore
)  # type: ignore

In [None]:
tides_df["solar_noon_offset"] = (
    np.abs(np.float32((pd.Timedelta(hours=12) - tides_df.solar_time_offset).values)) / 3.6e12  # type: ignore
)  # type: ignore
# === group by calendar date and pick the closest-overpass per sensor ===
tides_df["solar_date"] = tides_df["solar_time"].dt.date

In [None]:
MINUTES_IN_DAY = 24 * 60 / minutes_sample - 1 # less 1 bc rounding
tides_df = (
    tides_df
    .groupby("solar_date", group_keys=False)  # keep the original order
    .filter(lambda grp: len(grp) >= MINUTES_IN_DAY)        # keep only days with ≥ N samples
)

In [None]:
# ── colour palette for the bin guides ───────────────────────────
cm       = plt.get_cmap("viridis", nbins)   # nbins == len(height_edges)-1
palette  = [cm(i) for i in range(nbins)]

# ── choose bins for the histogram itself ───────────────────────
bins = 30

data = tides_df.tide_height.dropna().values

fig, ax = plt.subplots(figsize=(8, 4.5))

# main histogram
ax.hist(
    data,
    bins=bins,
    color="lightgrey",
    edgecolor="black",
    alpha=0.9,
)

# vertical guides at each height edge
for idx, edge in enumerate(height_edges[1:-1], 1):      # skip last for loop; add separately
    ax.axvline(edge, color=palette[idx], linestyle=":", linewidth=2)

# labels & title
ax.set_title("Tide-height distribution with bin guides")
ax.set_xlabel("Tide height (m)")
ax.set_ylabel("Count")

# optional legend explaining colours → bins
handles = [mpl.lines.Line2D([], [], color=palette[i], ls=":", lw=2,
                            label=f"edge {i}") for i in range(nbins)]
ax.legend(handles=handles, title="Height-bin edges",
          bbox_to_anchor=(1.04, 1), loc="upper left",
          frameon=False)

plt.tight_layout()
plt.savefig(FIG_DIR / "all_tide_hist.png")
plt.show()

In [None]:
cm       = plt.get_cmap("viridis", nbins)
palette  = [cm(i) for i in range(nbins)]

# ── 1.  pre-compute daily rows and mid-tide subset ──────────────────────
max_daily_height = tides_df.groupby("solar_date").tide_height.max()
min_daily_height = tides_df.groupby("solar_date").tide_height.min()

daily_max_rows = tides_df.loc[tides_df.groupby("solar_date").tide_height.idxmax()].copy()
daily_min_rows = tides_df.loc[tides_df.groupby("solar_date").tide_height.idxmin()].copy()
mid_rows       = tides_df.loc[tides_df.is_mid_tide].copy()             # NEW

for frame in (daily_max_rows, daily_min_rows, mid_rows):
    frame["time_hr"] = frame.solar_time_offset.dt.total_seconds() / 3600.0

# list-of-arrays helper
def offsets_by_bin(df):
    return [df.loc[df.height_bin == k, "time_hr"].values for k in range(nbins)]

max_by_bin = offsets_by_bin(daily_max_rows)
min_by_bin = offsets_by_bin(daily_min_rows)
mid_by_bin = offsets_by_bin(mid_rows)

# ── 2.  figure grid: 3 rows × 2 cols  ──────────────────────────────────
fig, axes = plt.subplots(3, 2, figsize=(11, 10), constrained_layout=True)

# shared x-range for ALL tide-height histograms
height_min = tides_df.tide_height.min()
height_max = tides_df.tide_height.max()
height_bins = np.linspace(height_min, height_max, 31)   # 30 equal-width bins

# left-column panels: tide-height histograms
titles_left = ["Daily max tide height", "Daily min tide height", "Mid-tide height"]
frames_left = [max_daily_height,         min_daily_height,         mid_rows.tide_height]

for ax, title, series in zip(axes[:, 0], titles_left, frames_left):
    ax.hist(series, bins=height_bins, color="lightgrey", edgecolor="black")
    # coloured vertical bin edges
    for idx, edge in enumerate(height_edges[:-1]):
        if height_min <= edge <= height_max:
            ax.axvline(edge, color=palette[idx], ls=":", lw=2)
    ax.axvline(height_edges[-1], color=palette[-1], ls=":", lw=2)
    ax.set_xlim(height_min, height_max)
    ax.set_xlabel("Tide height (m)")
    ax.set_title(title)

# right-column panels: solar-time histograms (stacked by height_bin)
hour_bins = np.linspace(0, 24, 25); hour_bins[[0, -1]] += [-1e-2, +1e-2]
titles_right = ["Time of daily max tide (h)",
                "Time of daily min tide (h)",
                "Time of mid-tide (h)"]
stacks_right = [max_by_bin, min_by_bin, mid_by_bin]

for ax, title, stacks in zip(axes[:, 1], titles_right, stacks_right):
    ax.hist(stacks, bins=hour_bins, stacked=True, color=palette,
            label=[f"bin {i}" for i in range(nbins)])
    ax.set_xlim(0, 24)
    ax.set_xlabel("Solar-time offset (hours)")
    ax.set_title(title)

# legend once
axes[-1, 1].legend(title="Height-bin", bbox_to_anchor=(1.04, 1), loc="upper left")

plt.savefig(FIG_DIR / "hist_tide_height.png")
plt.show()

In [None]:
groups = [
    ("planet",   "planet_offset",   1),
    ("sentinel", "sentinel_offset", 5),
    ("landsat",  "landsat_offset", 8),
    ("landsat8",  "landsat_offset", 16),
    # ("solar noon",  "solar_noon_offset"),
]

# 0 … nbins-1 → distinct viridis colours
cm       = plt.get_cmap("viridis", nbins)
norm     = mpl.colors.Normalize(vmin=0, vmax=nbins-1)   # for ScalarMappable
palette  = [cm(i) for i in range(nbins)]

ymin, ymax = tides_df.tide_height.min(), tides_df.tide_height.max()

fig, axes = plt.subplots(
    nrows=len(groups),
    ncols=1,
    figsize=(11, 3 * len(groups)),
    sharex=True,
    constrained_layout=True,
)

for ax, (sat_name, offset_name, stride) in zip(axes, groups):
    # one row per calendar day — pick the row whose <offset> is minimum
    df = tides_df.loc[
        tides_df.groupby("solar_date")[offset_name].idxmin(),
        ["acquired", "tide_height", "height_bin"],
    ].reset_index(drop=True).iloc[::stride]

    # draw the points
    sc = ax.scatter(
        df.acquired,
        df.tide_height,
        c=df.height_bin,
        cmap=cm,
        norm=norm,
        s=18,
        edgecolor="none",
    )

    # guide lines at first & last tide-bin edges
    ax.axhline(height_edges[1],  color=palette[0], linestyle="-", linewidth=2)
    ax.axhline(height_edges[-2], color=palette[-1], linestyle="-", linewidth=2)

    ax.axhline(mean_tide_height - mid_tide_delta,  color='red', linestyle=":", linewidth=2)
    ax.axhline(mean_tide_height + mid_tide_delta, color='red', linestyle=":", linewidth=2)

    ax.set_ylim(ymin, ymax)
    ax.set_ylabel("Tide-height (m)")
    ax.set_title(f"{sat_name.title()} – daily pass tide height")

# add ONE colour-bar for height_bin
cbar = fig.colorbar(
    mpl.cm.ScalarMappable(norm=norm, cmap=cm),
    ax=axes,
    orientation="vertical",
    label="Height-bin (0 = lowest, 9 = highest)",
    shrink=0.9,
    pad=0.02,
)

axes[-1].set_xlabel("Acquired datetime (UTC)")
plt.savefig(FIG_DIR / "temporal_tide_height.png")
plt.show()

In [None]:
# ────────────────────────────────────────────────────────────────
# Per-group histogram of tide heights
#   • uses the same height_bins (30 bins) for all groups
#   • bars are stacked by height_bin and coloured with the viridis palette
#   • red vertical lines mark the mid-tide band (mean ± mid_tide_delta)
# ────────────────────────────────────────────────────────────────

groups = [
    ("planet",   "planet_offset",   1),
    ("sentinel", "sentinel_offset", 5),
    ("landsat",  "landsat_offset", 8),
    # ("solar noon",  "solar_noon_offset"),
]

# palette for height_bin 0 … nbins-1
cm       = plt.get_cmap("viridis", nbins)
palette  = [cm(i) for i in range(nbins)]

# height-bin edges (30 bins across full range) – reuse if already set
height_min, height_max = tides_df.tide_height.min(), tides_df.tide_height.max()
# height_bins = np.linspace(height_min, height_max, 31)        # 30 equal-width bins

fig, axes = plt.subplots(
    nrows=len(groups),
    ncols=1,
    figsize=(11, 3 * len(groups)),
    sharex=True,
    constrained_layout=True,
)

for ax, (sat_name, offset_name, stride) in zip(axes, groups):
    # take the one row per day whose <offset> is smallest (closest pass)
    df = tides_df.loc[
        tides_df.groupby("solar_date")[offset_name].idxmin(),
        ["tide_height", "height_bin"],
    ].iloc[::stride]

    # build a list [bin0_vals, bin1_vals, …] for stacked histogram
    stacks = [df[df.height_bin == k].tide_height.values for k in range(nbins)]

    ax.hist(
        stacks,
        bins=height_edges,
        stacked=True,
        color=palette,
        edgecolor="black",
        label=[f"bin {k}" for k in range(nbins)],
    )

    # vertical guides at mean_tide ± mid_tide_delta
    ax.axvline(mean_tide_height - mid_tide_delta, color="red", ls="--", lw=2)
    ax.axvline(mean_tide_height + mid_tide_delta, color="red", ls="--", lw=2)

    ax.set_xlim(height_min, height_max)
    ax.set_ylabel("Count")
    ax.set_title(f"{sat_name.title()} – Tide Sample Height")

axes[-1].set_xlabel("Tide height (m)")
# add ONE colour-bar for height_bin
cbar = fig.colorbar(
    mpl.cm.ScalarMappable(norm=norm, cmap=cm),
    ax=axes,
    orientation="vertical",
    label="Height-bin (0 = lowest, 9 = highest)",
    shrink=0.9,
    pad=0.02,
)

plt.savefig(FIG_DIR / "hist_per_sat_tide_height.png")
plt.show()

In [None]:
# ─────────────── adjust here ─────────────── #
month_start = "2023-12-01"
month_end   = "2023-12-07"
N = 10                       # minutes between plotted points
# ──────────────────────────────────────────── #

# colour palette for height_bin
cm      = plt.get_cmap("viridis", nbins)
norm    = mpl.colors.Normalize(vmin=0, vmax=nbins-1)
palette = [cm(i) for i in range(nbins)]

# 1.  restrict to chosen month and subsample every N minutes
mask = (tides_df.acquired >= month_start) & (tides_df.acquired <= month_end)
month_df = tides_df.loc[mask]

month_df = month_df.iloc[::N]                        # subsample rows

# 2.  Planet pass per day (closest offset)
planet_daily = tides_df.loc[
    tides_df.groupby("solar_date")["planet_offset"].idxmin(),
    ["acquired", "tide_height", "height_bin"]
]
planet_daily = planet_daily[
    (planet_daily.acquired >= month_start) & (planet_daily.acquired <= month_end)
]

# 3.  plot
fig, ax = plt.subplots(figsize=(11, 4))

sc = ax.scatter(
    month_df.acquired,
    month_df.tide_height,
    c=month_df.height_bin,
    cmap=cm,
    norm=norm,
    s=6,
    alpha=0.7,
    edgecolor="none",
)

# vertical lines for Planet passes
for ts in planet_daily.acquired:
    ax.axvline(ts, color="blue", linestyle="--", linewidth=1)

# optional guides at first & last height-edges
ax.axhline(height_edges[1],  color=palette[0],   linestyle="-", linewidth=2)
ax.axhline(height_edges[-2], color=palette[-1], linestyle="-", linewidth=2)
# ax.axhline(mean_tide_height - mid_tide_delta,  color='red', linestyle=":", linewidth=2)
# ax.axhline(mean_tide_height + mid_tide_delta, color='red', linestyle=":", linewidth=2)

ax.set_ylabel("Tide height (m)")
ax.set_xlabel("Datetime (UTC)")
ax.set_title(f"Tide heights {month_start} – {month_end} (subsampled {N}-min)")

# add colour-bar
cbar = fig.colorbar(
    mpl.cm.ScalarMappable(norm=norm, cmap=cm),
    ax=ax, pad=0.02, shrink=0.8,
    label="Height-bin (0 = lowest, 9 = highest)"
)

plt.tight_layout()
plt.show()

In [None]:
# ─────────────── adjust here ─────────────── #
month_start = "2023-12-01"
month_end   = "2024-12-01"
N = 60
# ──────────────────────────────────────────── #

# ── subsample the month window (same as before) ─────────────────────────
mask  = (tides_df.acquired >= month_start) & (tides_df.acquired <= month_end)
month = tides_df.loc[mask].iloc[::N].sort_values("acquired")

# convert datetime → Matplotlib “date numbers”
x_dt   = pd.to_datetime(month.acquired)
x_num  = mdates.date2num(x_dt)                  # float days since 0001-01-01
y      = month.tide_height.values
c      = month.height_bin.values               # 0 … 9

cm      = plt.get_cmap("viridis", nbins)
norm    = mpl.colors.Normalize(vmin=0, vmax=nbins-1)
palette = [cm(i) for i in range(nbins)]

# build coloured segments
points   = np.column_stack([x_num, y])
segments = np.stack([points[:-1], points[1:]], axis=1)
colours  = plt.get_cmap("viridis", nbins)(c[:-1])

fig, ax = plt.subplots(figsize=(11,4))

lc = mpl.collections.LineCollection(segments,
                                    colors=colours,
                                    linewidth=1.3)
ax.add_collection(lc)

# tell Matplotlib that the x-axis is dates
ax.set_xlim(x_num.min(), x_num.max())
ax.xaxis_date()                                       # << key line
ax.xaxis.set_major_locator(mdates.AutoDateLocator())
ax.xaxis.set_major_formatter(mdates.ConciseDateFormatter(ax.xaxis.get_major_locator()))

# 2.  Planet pass per day (closest offset)
# planet_daily = tides_df.loc[
#     tides_df.groupby("solar_date")["planet_offset"].idxmin(),
#     ["acquired", "tide_height", "height_bin"]
# ]
# planet_daily = planet_daily[
#     (planet_daily.acquired >= month_start) & (planet_daily.acquired <= month_end)
# ]

# # Planet pass markers
# for ts in planet_daily.acquired:
#     ax.axvline(mdates.date2num(ts), color="red", ls="--", lw=0.8, alpha=0.6)

# guide lines at first & last tide-bin edges
ax.axhline(height_edges[1],  color=palette[0], lw=2)
ax.axhline(height_edges[-2], color=palette[9], lw=2)

ax.set_ylabel("Tide height (m)")
ax.set_title(f"Tide heights {month_start} – {month_end}  ({N}-min subsample)")

# colour-bar for height_bin
cbar = fig.colorbar(
    mpl.cm.ScalarMappable(norm=mpl.colors.Normalize(0, nbins-1),
                          cmap=plt.get_cmap("viridis", nbins)),
    ax=ax, pad=0.02, shrink=0.8,
    label="Height-bin (0 = lowest, 9 = highest)"
)

plt.tight_layout()
plt.savefig(FIG_DIR / "tidal_elevation.png")
plt.show()