# Tidal Analysis

In [None]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from pathlib import Path
from matplotlib.cm import ScalarMappable
import matplotlib as mpl
from matplotlib import colors, ticker
import cartopy.crs as ccrs

from src.plotting.util import load_grids, plot_gdf_column

import warnings
warnings.filterwarnings("ignore")  # hide every warning

In [None]:
BASE = Path("/Users/kyledorman/data/planet_coverage/points_30km/")
FIG_DIR = BASE.parent / "figs" / "simulated_tidal"
FIG_DIR.mkdir(exist_ok=True, parents=True)

In [None]:
display_crs = "EPSG:4326"

query_df, grids_df, hex_grid = load_grids(BASE)

# Load tidal data
tide_df = pd.read_csv(BASE / "simulated_tidal_coverage.csv").set_index("cell_id")


# Mark null values as full year
for col in tide_df.columns:
    tide_df.loc[tide_df[col].isna(), col] = 365.0
    assert not tide_df[col].isna().any()

# Merge all dataframes
tide_query_df = tide_df.join(query_df[["geometry", "hex_id"]], how='inner')
tide_query_df = gpd.GeoDataFrame(tide_query_df, geometry="geometry", crs=display_crs)

In [None]:
tide_query_df.head(3)

In [None]:
[p for p in tide_query_df.columns if p.startswith("planet")]

In [None]:
df = pd.DataFrame(tide_query_df.groupby("hex_id").planet_observed_high_tide_offset.median())
df = df[df.index >= 0].join(hex_grid[["geometry"]])
gdf = gpd.GeoDataFrame(df, geometry="geometry")

plot_gdf_column(
    gdf, 
    "planet_observed_high_tide_offset", 
    title="Planet Simulated High Tide Offset",
    show_land_ocean=True,
    show_grid=True,
    save_path=FIG_DIR / "planet_simulated_high_tide_offset.png",
)

df = pd.DataFrame(tide_query_df.groupby("hex_id").planet_observed_low_tide_offset.median())
df = df[df.index >= 0].join(hex_grid[["geometry"]])
gdf = gpd.GeoDataFrame(df, geometry="geometry")

plot_gdf_column(
    gdf, 
    "planet_observed_low_tide_offset", 
    title="Planet Simulated Low Tide Offset",
    show_land_ocean=True,
    show_grid=True,
    save_path=FIG_DIR / "planet_simulated_low_tide_offset.png"
)

df = pd.DataFrame(tide_query_df.groupby("hex_id").planet_observed_spread.median())
df = df[df.index >= 0].join(hex_grid[["geometry"]])
gdf = gpd.GeoDataFrame(df, geometry="geometry")

plot_gdf_column(
    gdf, 
    "planet_observed_spread", 
    title="Planet Phase Coverage",
    show_land_ocean=True,
    show_grid=True,
    save_path=FIG_DIR / "planet_phase_coverage.png"
)

In [None]:
df = pd.DataFrame(tide_query_df.groupby("hex_id").planet_low_days_between_p95.median())
df = df[df.index >= 0].join(hex_grid[["geometry"]])
gdf = gpd.GeoDataFrame(df, geometry="geometry")

plot_gdf_column(
    gdf, 
    "planet_low_days_between_p95", 
    title="Planet Simulated Low Days Between p95",
    show_land_ocean=True,
    show_grid=True,
    scale='log',
    save_path=FIG_DIR / "planet_low_days_between_p95.png"
)

df = pd.DataFrame(tide_query_df.groupby("hex_id").planet_high_days_between_p95.median())
df = df[df.index >= 0].join(hex_grid[["geometry"]])
gdf = gpd.GeoDataFrame(df, geometry="geometry")

plot_gdf_column(
    gdf, 
    "planet_high_days_between_p95", 
    title="Planet Simulated High Days Between p95",
    show_land_ocean=True,
    show_grid=True,
    scale='log',
    save_path=FIG_DIR / "planet_high_days_between_p95.png"
)

In [None]:
sensors       = ["planet", "sentinel", "landsat"]
base_metrics  = ["count"]
metrics       = [f"{lvl}_{m}" for m in base_metrics for lvl in ("high",)]

nrows, ncols  = len(sensors), len(metrics)
fig, axes = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * 12, nrows * 6),
    constrained_layout=True,
    subplot_kw={'projection': ccrs.Robinson()}
)

k = [
    f"{sat}_{metric}"
    for sat in sensors
    for metric in metrics
]

df = pd.DataFrame(tide_query_df.groupby("hex_id")[k].median())
df = df[df.index >= 0].join(hex_grid[["geometry"]])
gdf = gpd.GeoDataFrame(df, geometry="geometry")

# shared vmin/vmax across sensors for all metrics
vmin = max(1, gdf[k].min().min())
vmax = gdf[k].max().max()

for c, metric in enumerate(metrics):
    for r, sensor in enumerate(sensors):
        ax      = axes[r]
        ax.axis("off")
        colname = f"{sensor}_{metric}"

        gdf[f"_{colname}"] = gdf[colname] + 1

        plot_gdf_column(
            gdf,
            f"_{colname}",
            vmin=vmin,
            vmax=vmax,
            scale='log',
            show_land_ocean=True,
            show_grid=True,
            title=f"{sensor.title()} High Tide Days Count",
            ax=ax
        )
        
# add ONE colour‑bar for the whole column
# sm = ScalarMappable(norm=norm, cmap=cmap);  sm.set_array([])
# cax = fig.colorbar(sm, ax=axes[:, -1], shrink=0.6, pad=0.02, location="right")
# cax.ax.set_ylabel(base_metrics[0].replace('_', ' ').title())

cmap = plt.get_cmap("viridis")
norm = colors.LogNorm(vmin=vmin, vmax=vmax)
formatter = ticker.FuncFormatter(lambda y, _: f"{y:g}")
locator = ticker.LogLocator(base=10, numticks=10)
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
sm.set_array([])
cbar = fig.colorbar(sm, ax=axes[-1], orientation="horizontal", shrink=0.65, pad=0.02, format=formatter)
cbar.locator = locator
cbar.update_ticks()
cbar.set_label("High Tide Days Count")

plt.savefig(FIG_DIR / "high_tide_count.png", dpi=300)
plt.show()

In [None]:
sensors       = ["planet", "sentinel", "landsat"]
base_metrics  = ["count"]
metrics       = [f"{lvl}_{m}" for m in base_metrics for lvl in ("low",)]

nrows, ncols  = len(sensors), len(metrics)
fig, axes = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * 12, nrows * 6),
    constrained_layout=True,
    subplot_kw={'projection': ccrs.Robinson()}
)

k = [
    f"{sat}_{metric}"
    for sat in sensors
    for metric in metrics
]

df = pd.DataFrame(tide_query_df.groupby("hex_id")[k].median())
df = df[df.index >= 0].join(hex_grid[["geometry"]])
gdf = gpd.GeoDataFrame(df, geometry="geometry")

# shared vmin/vmax across sensors for all metrics
vmin = max(1, gdf[k].min().min())
vmax = gdf[k].max().max()

for c, metric in enumerate(metrics):
    for r, sensor in enumerate(sensors):
        ax      = axes[r]
        ax.axis("off")
        colname = f"{sensor}_{metric}"

        gdf[f"_{colname}"] = gdf[colname] + 1

        plot_gdf_column(
            gdf,
            f"_{colname}",
            vmin=vmin,
            vmax=vmax,
            scale='log',
            show_land_ocean=True,
            show_grid=True,
            title=f"{sensor.title()} Low Tide Days Count",
            ax=ax
        )
        
# add ONE colour‑bar for the whole column
# sm = ScalarMappable(norm=norm, cmap=cmap);  sm.set_array([])
# cax = fig.colorbar(sm, ax=axes[:, -1], shrink=0.6, pad=0.02, location="right")
# cax.ax.set_ylabel(base_metrics[0].replace('_', ' ').title())

cmap = plt.get_cmap("viridis")
norm = colors.LogNorm(vmin=vmin, vmax=vmax)
formatter = ticker.FuncFormatter(lambda y, _: f"{y:g}")
locator = ticker.LogLocator(base=10, numticks=10)
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
sm.set_array([])
cbar = fig.colorbar(sm, ax=axes[-1], orientation="horizontal", shrink=0.65, pad=0.02, format=formatter)
cbar.locator = locator
cbar.update_ticks()
cbar.set_label("Low Tide Days Count")

plt.savefig(FIG_DIR / "low_tide_count.png", dpi=300)
plt.show()

In [None]:
sensors       = ["planet", "sentinel", "landsat"]
base_metrics  = ["count"]
metrics       = [f"{lvl}_{m}" for m in base_metrics for lvl in ("high",)]

k = [
    f"{sat}_{metric}"
    for sat in sensors
    for metric in metrics
]

nrows, ncols  = len(sensors), len(metrics)
fig, axes = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * 12, nrows * 6),
    constrained_layout=True,
    subplot_kw={'projection': ccrs.Robinson()}
)

df = pd.DataFrame(tide_query_df.groupby("hex_id")[k].max())
df = df[df.index >= 0]
for kk in k:
    df[k] = df[k] > 0
df = df.join(hex_grid[["geometry"]])
gdf = gpd.GeoDataFrame(df, geometry="geometry")

# shared vmin/vmax across sensors for all metrics
vmin = 0
vmax = 1

for c, metric in enumerate(metrics):
    for r, sensor in enumerate(sensors):
        ax      = axes[r]
        ax.axis("off")
        colname = f"{sensor}_{metric}"

        plot_gdf_column(
            gdf,
            colname,
            vmin=vmin,
            vmax=vmax,
            show_land_ocean=True,
            show_grid=True,
            title=f"{sensor.title()} High Tide Seen",
            ax=ax
        )
        
# add ONE colour‑bar for the whole column
# sm = ScalarMappable(norm=norm, cmap=cmap);  sm.set_array([])
# cax = fig.colorbar(sm, ax=axes[:, -1], shrink=0.6, pad=0.02, location="right")
# cax.ax.set_ylabel(base_metrics[0].replace('_', ' ').title())

cmap = plt.get_cmap("viridis")
norm = colors.Normalize(vmin=vmin, vmax=vmax)
formatter = ticker.ScalarFormatter()
locator = ticker.MaxNLocator(nbins=6)
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
sm.set_array([])
cbar = fig.colorbar(sm, ax=axes[-1], orientation="horizontal", shrink=0.65, pad=0.02, format=formatter)
cbar.locator = locator
cbar.update_ticks()
cbar.set_label("High Tide Seen")

plt.savefig(FIG_DIR / "high_tide_binary.png", dpi=300)
plt.show()

In [None]:
sensors       = ["planet", "sentinel", "landsat"]
base_metrics  = ["count"]
metrics       = [f"{lvl}_{m}" for m in base_metrics for lvl in ("low",)]

k = [
    f"{sat}_{metric}"
    for sat in sensors
    for metric in metrics
]

nrows, ncols  = len(sensors), len(metrics)
fig, axes = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * 12, nrows * 6),
    constrained_layout=True,
    subplot_kw={'projection': ccrs.Robinson()}
)

df = pd.DataFrame(tide_query_df.groupby("hex_id")[k].max())
df = df[df.index >= 0]
for kk in k:
    df[k] = df[k] > 0
df = df.join(hex_grid[["geometry"]])
gdf = gpd.GeoDataFrame(df, geometry="geometry")

# shared vmin/vmax across sensors for all metrics
vmin = 0
vmax = 1

for c, metric in enumerate(metrics):
    for r, sensor in enumerate(sensors):
        ax      = axes[r]
        ax.axis("off")
        colname = f"{sensor}_{metric}"

        plot_gdf_column(
            gdf,
            colname,
            vmin=vmin,
            vmax=vmax,
            show_land_ocean=True,
            show_grid=True,
            title=f"{sensor.title()} Low Tide Seen",
            ax=ax
        )
        
# add ONE colour‑bar for the whole column
# sm = ScalarMappable(norm=norm, cmap=cmap);  sm.set_array([])
# cax = fig.colorbar(sm, ax=axes[:, -1], shrink=0.6, pad=0.02, location="right")
# cax.ax.set_ylabel(base_metrics[0].replace('_', ' ').title())

cmap = plt.get_cmap("viridis")
norm = colors.Normalize(vmin=vmin, vmax=vmax)
formatter = ticker.ScalarFormatter()
locator = ticker.MaxNLocator(nbins=6)
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
sm.set_array([])
cbar = fig.colorbar(sm, ax=axes[-1], orientation="horizontal", shrink=0.65, pad=0.02, format=formatter)
cbar.locator = locator
cbar.update_ticks()
cbar.set_label("Low Tide Seen")

plt.savefig(FIG_DIR / "low_tide_binary.png", dpi=300)
plt.show()

In [None]:
sensors       = ["planet", "sentinel", "landsat"]
base_metrics  = ["days_between_p95"]
metrics       = [f"{lvl}_{m}" for m in base_metrics for lvl in ("high",)]

nrows, ncols  = len(sensors), len(metrics)
fig, axes = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * 12, nrows * 6),
    constrained_layout=True,
    subplot_kw={'projection': ccrs.Robinson()}
)

k = [
    f"{sat}_{metric}"
    for sat in sensors
    for metric in metrics
]

df = pd.DataFrame(tide_query_df.groupby("hex_id")[k].median())
df = df[df.index >= 0].join(hex_grid[["geometry"]])
gdf = gpd.GeoDataFrame(df, geometry="geometry")

# shared vmin/vmax across sensors for all metrics
vmin = max(1, gdf[k].min().min())
vmax = gdf[k].max().max()

for c, metric in enumerate(metrics):
    for r, sensor in enumerate(sensors):
        ax      = axes[r]
        ax.axis("off")
        colname = f"{sensor}_{metric}"

        gdf[f"_{colname}"] = gdf[colname] + 1

        plot_gdf_column(
            gdf,
            f"_{colname}",
            vmin=vmin,
            vmax=vmax,
            scale='log',
            show_land_ocean=True,
            show_grid=True,
            title=f"{sensor.title()} High Tide Days Between Samples",
            ax=ax
        )
        
# add ONE colour‑bar for the whole column
# sm = ScalarMappable(norm=norm, cmap=cmap);  sm.set_array([])
# cax = fig.colorbar(sm, ax=axes[:, -1], shrink=0.6, pad=0.02, location="right")
# cax.ax.set_ylabel(base_metrics[0].replace('_', ' ').title())

cmap = plt.get_cmap("viridis")
norm = colors.LogNorm(vmin=vmin, vmax=vmax)
formatter = ticker.FuncFormatter(lambda y, _: f"{y:g}")
locator = ticker.LogLocator(base=10, numticks=10)
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
sm.set_array([])
cbar = fig.colorbar(sm, ax=axes[-1], orientation="horizontal", shrink=0.65, pad=0.02, format=formatter)
cbar.locator = locator
cbar.update_ticks()
cbar.set_label("High Tide Days Between Samples")

plt.savefig(FIG_DIR / "high_tide_days_between.png", dpi=300)
plt.show()

In [None]:
sensors       = ["planet", "sentinel", "landsat"]
base_metrics  = ["days_between_p95"]
metrics       = [f"{lvl}_{m}" for m in base_metrics for lvl in ("low",)]

nrows, ncols  = len(sensors), len(metrics)
fig, axes = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * 12, nrows * 6),
    constrained_layout=True,
    subplot_kw={'projection': ccrs.Robinson()}
)

k = [
    f"{sat}_{metric}"
    for sat in sensors
    for metric in metrics
]

df = pd.DataFrame(tide_query_df.groupby("hex_id")[k].median())
df = df[df.index >= 0].join(hex_grid[["geometry"]])
gdf = gpd.GeoDataFrame(df, geometry="geometry")

# shared vmin/vmax across sensors for all metrics
vmin = max(1, gdf[k].min().min())
vmax = gdf[k].max().max()

for c, metric in enumerate(metrics):
    for r, sensor in enumerate(sensors):
        ax      = axes[r]
        ax.axis("off")
        colname = f"{sensor}_{metric}"

        gdf[f"_{colname}"] = gdf[colname] + 1

        plot_gdf_column(
            gdf,
            f"_{colname}",
            vmin=vmin,
            vmax=vmax,
            scale='log',
            show_land_ocean=True,
            show_grid=True,
            title=f"{sensor.title()} Low Tide Days Between Samples",
            ax=ax
        )
        
# add ONE colour‑bar for the whole column
# sm = ScalarMappable(norm=norm, cmap=cmap);  sm.set_array([])
# cax = fig.colorbar(sm, ax=axes[:, -1], shrink=0.6, pad=0.02, location="right")
# cax.ax.set_ylabel(base_metrics[0].replace('_', ' ').title())

cmap = plt.get_cmap("viridis")
norm = colors.LogNorm(vmin=vmin, vmax=vmax)
formatter = ticker.FuncFormatter(lambda y, _: f"{y:g}")
locator = ticker.LogLocator(base=10, numticks=10)
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
sm.set_array([])
cbar = fig.colorbar(sm, ax=axes[-1], orientation="horizontal", shrink=0.65, pad=0.02, format=formatter)
cbar.locator = locator
cbar.update_ticks()
cbar.set_label("Low Tide Days Between Samples")

plt.savefig(FIG_DIR / "low_tide_days_between.png", dpi=300)
plt.show()

In [None]:
sensors       = ["planet", "sentinel", "landsat"]
base_metrics  = ["days_between_p95"]
metrics       = [f"{lvl}_{m}" for m in base_metrics for lvl in ("mid",)]

nrows, ncols  = len(sensors), len(metrics)
fig, axes = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * 12, nrows * 6),
    constrained_layout=True,
    subplot_kw={'projection': ccrs.Robinson()}
)

k = [
    f"{sat}_{metric}"
    for sat in sensors
    for metric in metrics
]

df = pd.DataFrame(tide_query_df.groupby("hex_id")[k].median())
df = df[df.index >= 0].join(hex_grid[["geometry"]])
gdf = gpd.GeoDataFrame(df, geometry="geometry")

# shared vmin/vmax across sensors for all metrics
vmin = max(1, gdf[k].min().min())
vmax = gdf[k].max().max()

for c, metric in enumerate(metrics):
    for r, sensor in enumerate(sensors):
        ax      = axes[r]
        ax.axis("off")
        colname = f"{sensor}_{metric}"

        gdf[f"_{colname}"] = gdf[colname] + 1

        plot_gdf_column(
            gdf,
            f"_{colname}",
            vmin=vmin,
            vmax=vmax,
            scale='log',
            show_land_ocean=True,
            show_grid=True,
            title=f"{sensor.title()} Mid Tide Days Between Samples",
            ax=ax
        )
        
# add ONE colour‑bar for the whole column
# sm = ScalarMappable(norm=norm, cmap=cmap);  sm.set_array([])
# cax = fig.colorbar(sm, ax=axes[:, -1], shrink=0.6, pad=0.02, location="right")
# cax.ax.set_ylabel(base_metrics[0].replace('_', ' ').title())

cmap = plt.get_cmap("viridis")
norm = colors.LogNorm(vmin=vmin, vmax=vmax)
formatter = ticker.FuncFormatter(lambda y, _: f"{y:g}")
locator = ticker.LogLocator(base=10, numticks=10)
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
sm.set_array([])
cbar = fig.colorbar(sm, ax=axes[-1], orientation="horizontal", shrink=0.65, pad=0.02, format=formatter)
cbar.locator = locator
cbar.update_ticks()
cbar.set_label("Mid Tide Days Between Samples")

plt.savefig(FIG_DIR / "mid_tide_days_between.png", dpi=300)
plt.show()

In [None]:
sensors       = ["planet", "sentinel", "landsat"]
base_metrics  = ["count"]
metrics       = [f"{lvl}_{m}" for m in base_metrics for lvl in ("low", "high", "mid")]

for metric in metrics:
    print(metric, "% No observations")
    for sensor in sensors:
        print(sensor, round(100 * (tide_query_df[f'{sensor}_{metric}'] == 0).sum() / len(tide_df), 1))

In [None]:
sensors       = ["planet", "sentinel", "landsat"]
base_metrics  = ["days_between_p95"]
metrics       = [f"{lvl}_{m}" for m in base_metrics for lvl in ("low", "high", "mid")]

for metric in metrics:
    print(metric, "% > 100 days")
    for sensor in sensors:
        print(sensor, round(100 * (tide_df[f'{sensor}_{metric}'] > 100).sum() / len(tide_df), 1))

In [None]:
sensors       = ["planet", "sentinel", "landsat"]
base_metrics  = ["days_between_p95"]
metrics       = [f"{lvl}_{m}" for m in base_metrics for lvl in ("low", "high", "mid")]

for metric in metrics:
    print(metric, "median")
    for sensor in sensors:
        print(sensor, round(tide_df[f'{sensor}_{metric}'].median(), 1))

In [None]:
sensors = ["planet", "sentinel", "landsat"]
levels = ["low", "high"]

for lvl in levels:
    metric = f"observed_{lvl}_tide_offset"
    print(metric, "median")
    for sensor in sensors:
        print(sensor, round(tide_df[f'{sensor}_{metric}'].median(), 1))
    print("")

In [None]:
sensors = ["planet", "sentinel", "landsat"]

metric = f"observed_spread"
print(metric, "median")
for sensor in sensors:
    print(sensor, round(tide_df[f'{sensor}_{metric}'].median(), 1))

In [None]:
sensors = ["planet", "sentinel", "landsat"]
levels = ["low", "high"]

for lvl in levels:
    metric = f"observed_{lvl}_tide_offset_rel"
    print(metric, "median")
    for sensor in sensors:
        print(sensor, round(tide_df[f'{sensor}_{metric}'].median(), 1))
    print("")