# Tidal Analysis

In [None]:
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
import folium
from pathlib import Path
from matplotlib.cm import ScalarMappable
import matplotlib as mpl
from branca.colormap import linear
from shapely.geometry import Polygon, MultiPolygon
from random import shuffle
from matplotlib import colors, ticker
import cartopy.crs as ccrs

from src.gen_points_map import compute_step, make_equal_area_hex_grid
from src.geo_util import assign_intersection_id
from src.plotting.util import plot_gdf_column

import warnings
warnings.filterwarnings("ignore")  # hide every warning

In [None]:
BASE = Path("/Users/kyledorman/data/planet_coverage/points_30km/")
FIG_DIR = BASE.parent / "figs" / "simulated_tidal"
FIG_DIR.mkdir(exist_ok=True, parents=True)

In [None]:
display_crs = "EPSG:4326"
robinson_crs = "ESRI:54030"
sinus_crs = "ESRI:54008"

ca_ocean = gpd.read_file(BASE / "ca_ocean.geojson")
query_df = gpd.read_file(BASE / "ocean_grids.gpkg")
grids_df = gpd.read_file(BASE / "coastal_grids.gpkg").rename(columns={"cell_id": "grid_id"})
heuristics_df = pd.read_csv(BASE / "simulated_tidal_coverage_heuristics.csv").set_index("cell_id")

cell_size_m = compute_step(1.5)
_, hex_grid = make_equal_area_hex_grid(cell_size_m, robinson_crs)
hex_grid = hex_grid.rename(columns={"cell_id": "hex_id"}).to_crs(display_crs)

# Assign hex_id to query_df and grid_df
grids_df = assign_intersection_id(grids_df, hex_grid, "grid_id", "hex_id", sinus_crs)
query_df = assign_intersection_id(query_df, hex_grid, "cell_id", "hex_id", sinus_crs)

# Assign cell_id to grid_df
grids_df = assign_intersection_id(grids_df, query_df, "grid_id", "cell_id", sinus_crs)

# Add tidal information to grids_df and query_df
grids_df = grids_df.set_index("cell_id").join(heuristics_df, how='left').reset_index()
query_df = query_df.set_index("cell_id").join(heuristics_df, how='left').reset_index()

# Set plot crs
query_df = query_df.to_crs(display_crs)
grids_df = grids_df.to_crs(display_crs)

# Set indexes
query_df = query_df.set_index("cell_id")
grids_df = grids_df.set_index("grid_id")
hex_grid = hex_grid.set_index("hex_id")

# Filter grids to CA region
query_ca = query_df[query_df.geometry.intersects(ca_ocean.union_all())]
grids_ca = grids_df[grids_df.geometry.intersects(query_ca.union_all())]
hex_grid_ca = hex_grid[hex_grid.geometry.intersects(query_ca.union_all())]

# Load tidal data
tide_df = pd.read_csv(BASE / "simulated_tidal_coverage.csv").set_index("cell_id")
heuristics_df = pd.read_csv(BASE / "simulated_tidal_coverage_heuristics.csv").set_index("cell_id")

# Mark null values as full year
for col in tide_df.columns:
    tide_df.loc[tide_df[col].isna(), col] = 365.0
    assert not tide_df[col].isna().any()

# Merge all dataframes
tide_heuristics_grid_df = tide_df.join(heuristics_df).join(query_df[["geometry", "hex_id"]], how='inner')
tide_heuristics_grid_df = gpd.GeoDataFrame(tide_heuristics_grid_df, geometry="geometry", crs=display_crs)

In [None]:
tide_heuristics_grid_df.head(3)

In [None]:
heuristics_df.head(5)

In [None]:
gdf = tide_heuristics_grid_df[["geometry"]].copy()

ids = list(range(len(gdf)))
shuffle(ids)
gdf["id"] = ids

n_ids  = gdf["id"].nunique()
base_cmap = plt.get_cmap("tab20", n_ids)  # up to 20 unique colours
cmap      = colors.ListedColormap(base_cmap(range(n_ids)))
norm      = colors.BoundaryNorm(range(n_ids + 1), n_ids)

# Pick any Cartopy projection
proj = ccrs.Robinson()           # or ccrs.Mollweide(), ccrs.Robinson(), …

fig = plt.figure(figsize=(12, 6))
ax  = plt.axes(projection=proj)
ax.set_global()

# Re-project your data on the fly with `transform`
gdf.plot(
    column="id",
    ax=ax,
    cmap=cmap,
    norm=norm,
    linewidth=0.15,
    edgecolor="black",
    transform=ccrs.PlateCarree(),   # <- incoming lon/lat coords
)

plt.title("Coastal Tide Grids", pad=12)
plt.tight_layout()
plt.savefig(FIG_DIR / "tidal_grids.png")
plt.show()

In [None]:
gdf = hex_grid.loc[tide_heuristics_grid_df.hex_id.unique()]
ids = list(range(len(gdf)))
shuffle(ids)
gdf["id"] = ids

n_ids  = gdf.id.nunique()
base_cmap = plt.get_cmap("tab20", n_ids)  # up to 20 unique colours
cmap      = colors.ListedColormap(base_cmap(range(n_ids)))
norm      = colors.BoundaryNorm(range(n_ids + 1), n_ids)

# Pick any Cartopy projection
proj = ccrs.Robinson()           # or ccrs.Mollweide(), ccrs.Robinson(), …

fig = plt.figure(figsize=(12, 6))
ax  = plt.axes(projection=proj)
ax.set_global()

# Re-project your data on the fly with `transform`
gdf.plot(
    column="id",
    ax=ax,
    cmap=cmap,
    norm=norm,
    linewidth=0.15,
    edgecolor="black",
    transform=ccrs.PlateCarree(),   # <- incoming lon/lat coords
)

plt.title("Hex Coastal Area", pad=12)
plt.tight_layout()
plt.savefig(FIG_DIR / "hex_coastal_area.png")
plt.show()

In [None]:
gdf = gpd.read_file(BASE / "coastal_strips.gpkg")
ids = list(range(len(gdf)))
shuffle(ids)
gdf["id"] = ids

n_ids  = gdf.id.nunique()
base_cmap = plt.get_cmap("tab20", n_ids)  # up to 20 unique colours
cmap      = colors.ListedColormap(base_cmap(range(n_ids)))
norm      = colors.BoundaryNorm(range(n_ids + 1), n_ids)

# Pick any Cartopy projection
proj = ccrs.Robinson()           # or ccrs.Mollweide(), ccrs.Robinson(), …

fig = plt.figure(figsize=(12, 6))
ax  = plt.axes(projection=proj)
ax.set_global()

# Re-project your data on the fly with `transform`
gdf.plot(
    column="id",
    ax=ax,
    cmap=cmap,
    norm=norm,
    linewidth=0.15,
    edgecolor="black",
    transform=ccrs.Sinusoidal(),   # <- incoming lon/lat coords
)

plt.title("Coastal Area", pad=12)
plt.tight_layout()
plt.savefig(FIG_DIR / "coastal_strips.png")
plt.show()

In [None]:
[p for p in tide_heuristics_grid_df.columns if p.startswith("planet")]

In [None]:
df = pd.DataFrame(tide_heuristics_grid_df.groupby("hex_id").planet_observed_high_tide_offset.median())
df = df[df.index >= 0]
df = df.join(hex_grid[["geometry"]])
gdf = gpd.GeoDataFrame(df, geometry="geometry")

plot_gdf_column(
    gdf, 
    "planet_observed_high_tide_offset", 
    title="Planet Simulated High Tide Offset",
    show_coastlines=True,
    save_path=FIG_DIR / "planet_observed_high_tide_offset.png"
)

df = pd.DataFrame(tide_heuristics_grid_df.groupby("hex_id").planet_observed_low_tide_offset.median())
df = df[df.index >= 0]
df = df.join(hex_grid[["geometry"]])
gdf = gpd.GeoDataFrame(df, geometry="geometry")

plot_gdf_column(
    gdf, 
    "planet_observed_low_tide_offset", 
    title="Planet Simulated Low Tide Offset",
    show_coastlines=True,
    save_path=FIG_DIR / "planet_observed_low_tide_offset.png"
)

df = pd.DataFrame(tide_heuristics_grid_df.groupby("hex_id").planet_observed_spread.median())
df = df[df.index >= 0]
df = df.join(hex_grid[["geometry"]])
gdf = gpd.GeoDataFrame(df, geometry="geometry")

plot_gdf_column(
    gdf, 
    "planet_observed_spread", 
    title="Planet Phase Coverage",
    show_coastlines=True,
    save_path=FIG_DIR / "planet_phase_coverage.png"
)

In [None]:
df = pd.DataFrame(tide_heuristics_grid_df.groupby("hex_id").planet_low_days_between_p95.median())
df = df[df.index >= 0]
df = df.join(hex_grid[["geometry"]])
gdf = gpd.GeoDataFrame(df, geometry="geometry")

plot_gdf_column(
    gdf, 
    "planet_low_days_between_p95", 
    title="Planet Simulated Low Days Between p95",
    show_coastlines=True,
    scale='log',
    save_path=FIG_DIR / "planet_low_days_between_p95.png"
)

df = pd.DataFrame(tide_heuristics_grid_df.groupby("hex_id").planet_high_days_between_p95.median())
df = df[df.index >= 0]
df = df.join(hex_grid[["geometry"]])
gdf = gpd.GeoDataFrame(df, geometry="geometry")

plot_gdf_column(
    gdf, 
    "planet_high_days_between_p95", 
    title="Planet Simulated High Days Between p95",
    show_coastlines=True,
    scale='log',
    save_path=FIG_DIR / "planet_high_days_between_p95.png"
)

In [None]:
# ───────────────────────────────────────────────────────────────
# 2. axes layout: rows = sensors, cols = metrics
# ───────────────────────────────────────────────────────────────
sensors       = ["planet", "sentinel", "landsat"]
base_metrics  = ["count"]
metrics       = [f"{lvl}_{m}" for m in base_metrics for lvl in ("low", "high")]

nrows, ncols  = len(sensors), len(metrics)
fig, axes = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * 4, nrows * 2),
    constrained_layout=True,
)

# ───────────────────────────────────────────────────────────────
# 3.  loop over columns (metrics) to set a shared scale per column
# ───────────────────────────────────────────────────────────────
cmap = "viridis"

for c, metric in enumerate(metrics):
    # shared vmin/vmax across sensors for this metric
    col_values = [f"{sat}_{metric}" for sat in sensors]
    vmin = tide_heuristics_pts_df[col_values].min().min()
    vmax = tide_heuristics_pts_df[col_values].max().max()
    norm = mpl.colors.Normalize(vmin=vmin, vmax=vmax)

    for r, sensor in enumerate(sensors):
        ax      = axes[r, c]
        colname = f"{sensor}_{metric}"

        tide_heuristics_pts_df.plot(
            column     = colname,
            ax         = ax,
            cmap       = cmap,
            norm       = norm,
            marker     = "o",
            markersize = 1,
            linewidth  = 0,
            legend     = False,
        )

        # titles: top row gets metric title; first column gets sensor label
        ax.set_title(f"{sensor.title()} {metric.replace('_', ' ').title()}")
        ax.axis("off")

# add ONE colour‑bar for the whole column
sm = ScalarMappable(norm=norm, cmap=cmap);  sm.set_array([])
cax = fig.colorbar(sm, ax=axes[:, -1], shrink=0.6, pad=0.02, location="right")
cax.ax.set_ylabel(base_metrics[0].replace('_', ' ').title())

plt.savefig(FIG_DIR / "tide_count.png", dpi=300)
plt.show()

In [None]:
# ───────────────────────────────────────────────────────────────
# 2. axes layout: rows = sensors, cols = metrics
# ───────────────────────────────────────────────────────────────
sensors       = ["planet", "sentinel", "landsat"]
base_metrics  = ["count"]
metrics       = [f"{lvl}_{m}" for m in base_metrics for lvl in ("low", "high")]

nrows, ncols  = len(sensors), len(metrics)
fig, axes = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * 4, nrows * 2),
    constrained_layout=True,
)

# ───────────────────────────────────────────────────────────────
# 3.  loop over columns (metrics) to set a shared scale per column
# ───────────────────────────────────────────────────────────────
cmap = "viridis"

for c, metric in enumerate(metrics):
    # shared vmin/vmax across sensors for this metric
    col_values = [f"{sat}_{metric}" for sat in sensors]
    vmin = tide_heuristics_pts_df[col_values].min().min()
    vmax = 1
    norm = mpl.colors.Normalize(vmin=vmin, vmax=vmax)

    for r, sensor in enumerate(sensors):
        ax      = axes[r, c]
        colname = f"{sensor}_{metric}"
        assert not tide_heuristics_pts_df[colname].isna().any()

        tide_heuristics_pts_df.plot(
            column     = colname,
            ax         = ax,
            cmap       = cmap,
            norm       = norm,
            marker     = "o",
            markersize = 1,
            linewidth  = 0,
            legend     = False,
        )

        # titles: top row gets metric title; first column gets sensor label
        ax.set_title(f"{sensor.title()} {metric.replace('_', ' ').title()}")
        ax.axis("off")

# add ONE colour‑bar for the whole column
sm = ScalarMappable(norm=norm, cmap=cmap);  sm.set_array([])
cax = fig.colorbar(sm, ax=axes[:, -1], shrink=0.6, pad=0.02, location="right")
cax.ax.set_ylabel(base_metrics[0].replace('_', ' ').title())

plt.savefig(FIG_DIR / "tide_count_binary.png", dpi=300)
plt.show()

In [None]:
# ───────────────────────────────────────────────────────────────
# 2. axes layout: rows = sensors, cols = metrics
# ───────────────────────────────────────────────────────────────
sensors       = ["planet", "sentinel", "landsat"]
base_metrics  = ["days_between_p95"]
metrics       = [f"{lvl}_{m}" for m in base_metrics for lvl in ("low", "high", "mid")]

nrows, ncols  = len(sensors), len(metrics)
fig, axes = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * 4, nrows * 2),
    constrained_layout=True,
)

# ───────────────────────────────────────────────────────────────
# 3.  loop over columns (metrics) to set a shared scale per column
# ───────────────────────────────────────────────────────────────
cmap = "viridis"

for c, metric in enumerate(metrics):
    # shared vmin/vmax across sensors for this metric
    col_values = [f"{sat}_{metric}" for sat in sensors]
    vmin = 0 # tide_heuristics_pts_df[col_values].min().min()
    vmax = tide_heuristics_pts_df[col_values].max().max()
    norm = mpl.colors.Normalize(vmin=vmin, vmax=vmax)

    for r, sensor in enumerate(sensors):
        ax      = axes[r, c]
        colname = f"{sensor}_{metric}"
        assert not tide_heuristics_pts_df[colname].isna().any(), colname

        tide_heuristics_pts_df.plot(
            column     = colname,
            ax         = ax,
            cmap       = cmap,
            norm       = norm,
            marker     = "o",
            markersize = 1,
            linewidth  = 0,
            legend     = False,
        )

        # titles: top row gets metric title; first column gets sensor label
        low_high = metric.split("_")[0]
        ax.set_title(f"{sensor.title()} {low_high.title()} Days Between p95")
        ax.axis("off")

# add ONE colour‑bar for the whole column
sm = ScalarMappable(norm=norm, cmap=cmap);  sm.set_array([])
cax = fig.colorbar(sm, ax=axes[:, -1], shrink=0.6, pad=0.02, location="right")
cax.ax.set_ylabel(base_metrics[0].replace('_', ' ').title())

plt.savefig(FIG_DIR / "tide_days_between_p95.png", dpi=300)
plt.show()

In [None]:
sensors       = ["planet", "sentinel", "landsat"]
base_metrics  = ["count"]
metrics       = [f"{lvl}_{m}" for m in base_metrics for lvl in ("low", "high", "mid")]

for metric in metrics:
    print(metric, "% No observations")
    for sensor in sensors:
        print(sensor, round(100 * (tide_df[f'{sensor}_{metric}'] == 0).sum() / len(tide_df), 1))

In [None]:
df = tide_heuristics_grid_df[tide_heuristics_grid_df.tide_range > 2.0]
sensors       = ["planet", "sentinel", "landsat"]
base_metrics  = ["count"]
metrics       = [f"{lvl}_{m}" for m in base_metrics for lvl in ("low", "high", "mid")]

for metric in metrics:
    print(metric, "% No observations - Large Range")
    for sensor in sensors:
        print(sensor, round(100 * (df[f'{sensor}_{metric}'] == 0).sum() / len(tide_df), 1))

In [None]:
sensors       = ["planet", "sentinel", "landsat"]
base_metrics  = ["days_between_p95"]
metrics       = [f"{lvl}_{m}" for m in base_metrics for lvl in ("low", "high", "mid")]

for metric in metrics:
    print(metric, "% > 100 days")
    for sensor in sensors:
        print(sensor, round(100 * (tide_df[f'{sensor}_{metric}'] > 100).sum() / len(tide_df), 1))

In [None]:
sensors       = ["planet", "sentinel", "landsat"]
base_metrics  = ["days_between_p95"]
metrics       = [f"{lvl}_{m}" for m in base_metrics for lvl in ("low", "high", "mid")]

for metric in metrics:
    print(metric, "median")
    for sensor in sensors:
        print(sensor, round(tide_df[f'{sensor}_{metric}'].median(), 1))

In [None]:
df = tide_heuristics_grid_df[tide_heuristics_grid_df.tide_range > 2.0]
sensors       = ["planet", "sentinel", "landsat"]
base_metrics  = ["days_between_p95"]
metrics       = [f"{lvl}_{m}" for m in base_metrics for lvl in ("low", "high", "mid")]

for metric in metrics:
    print(metric, "median - Large Range")
    for sensor in sensors:
        print(sensor, round(df[f'{sensor}_{metric}'].median(), 1))

In [None]:
sensors = ["planet", "sentinel", "landsat"]
levels = ["low", "high"]

for lvl in levels:
    metric = f"observed_{lvl}_tide_offset"
    print(metric, "median")
    for sensor in sensors:
        print(sensor, round(tide_df[f'{sensor}_{metric}'].median(), 1))
    print("")

In [None]:
sensors = ["planet", "sentinel", "landsat"]

metric = f"observed_spread"
print(metric, "median")
for sensor in sensors:
    print(sensor, round(tide_df[f'{sensor}_{metric}'].median(), 1))

In [None]:
sensors = ["planet", "sentinel", "landsat"]
levels = ["low", "high"]

for lvl in levels:
    metric = f"observed_{lvl}_tide_offset_rel"
    print(metric, "median")
    for sensor in sensors:
        print(sensor, round(tide_df[f'{sensor}_{metric}'].median(), 1))
    print("")

In [None]:
tide_heuristics_grid_df.tide_range.hist()

In [None]:
obs_df = gpd.read_file("../grid_tide.gpkg")

obs_df.head(3)

In [None]:
obs_df.columns

In [None]:
obs_df[["grid_id", "hex_id", "cell_id", "tide_min", "tide_max", "tide_range", "obs_min_tide_height", "obs_max_tide_height", "obs_tide_range", "sample_count"]].head(5)

In [None]:
plt.figure()
obs_df.sample_count.hist()

plt.figure()
obs_df.obs_tide_range.hist()

plt.figure()
obs_df.phase_coverage.hist()

plt.figure()
obs_df.groupby("cell_id").phase_coverage.min().hist()

plt.figure()
obs_df.groupby("cell_id").phase_coverage.median().hist()

In [None]:
cell_df = hex_grid[["geometry"]].join(obs_df.groupby("hex_id").phase_coverage.quantile(0.75), how='inner')

plot_gdf_column(cell_df, "phase_coverage")

In [None]:
cell_df = hex_grid[["geometry"]].join(obs_df.groupby("hex_id").obs_high_tide_offset.quantile(0.5), how='inner')

plot_gdf_column(cell_df, "obs_high_tide_offset")