# Tidal Analysis

In [1]:
import duckdb
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
import folium
from pathlib import Path
from IPython.display import display

from src.util import create_config

In [2]:
BASE = Path("/Users/kyledorman/data/planet_coverage/ca_only/")  # <-- update this
config_file = BASE / "dove" / "config.yaml"  # <-- update this
config = create_config(config_file)

GRID_ID = 31565
hex_id = f"{GRID_ID:06x}"  # unique 6‑digit hex, e.g. '0f1a2b'
d1, d2, d3 = hex_id[:2], hex_id[2:4], hex_id[4:6]
GRID_PATH = BASE / "dove" / "results" / "2023" / d1 / d2 / d3
FILE_PATH = GRID_PATH / "data.parquet"

assert FILE_PATH.exists()

In [3]:
# Create the base map centered on the calculated location
ca_ocean = gpd.read_file(BASE / "ca_ocean.geojson")
all_grids_df = gpd.read_file(BASE / "ocean_grids.gpkg")
tide_df = pd.read_csv(BASE / "simulated_tidal_coverage.csv")
grid_df = all_grids_df[all_grids_df.cell_id.isin(tide_df.cell_id)].to_crs(ca_ocean.crs)

In [4]:
# --- Connect to DuckDB ---
con = duckdb.connect()
# Register a view for a single file for faster iteration
con.execute(f"""
    CREATE OR REPLACE VIEW samples_one AS
    SELECT * FROM '{FILE_PATH}'
""")

<duckdb.duckdb.DuckDBPyConnection at 0x15693be70>

In [None]:
sample_df = con.execute("SELECT * FROM samples_one").fetchdf()

print(len(sample_df))

sample_df.head()

In [None]:

import math
import ephem
from ephem import Sun
from datetime import datetime, time, timedelta, timezone

def solartime(observer, sun):
    sun.compute(observer)
    # sidereal time == ra (right ascension) is the highest point (noon)
    hour_angle = observer.sidereal_time() - sun.ra
    return ephem.hours(hour_angle + ephem.hours('12:00')).norm  # norm for 24h

sun = Sun()
o = ephem.Observer()
ts_utc = np.array(sample_df.acquired.dt.to_pydatetime())[0]
o.date = ts_utc
lon, lat = grid_df.geometry.iloc[0].centroid.x, grid_df.geometry.iloc[0].centroid.y
o.lon = str(lon)
o.lat = str(lat)

print(o.next_transit(sun), ts_utc)

ha = solartime(o, sun)
print("ha", ha)
days = ha / 360
print(days)
if days > 0.5:
    days -= 0.5
td = timedelta(days=days)
td.total_seconds() / 60


In [None]:
clear_df = sample_df[
    (sample_df['clear_percent'] > 80) & 
    (sample_df.publishing_stage == "finalized") &
    (sample_df.quality_category == "standard") &
    (sample_df.has_sr_asset) &
    (sample_df.ground_control)
]

# 1) extract month numbers
all_month = sample_df['acquired'].dt.month
clear_month = clear_df['acquired'].dt.month

# 2) count samples per month, ensure 1–12
months = np.arange(1, 13)
counts_all   = all_month.value_counts().reindex(months, fill_value=0).values
counts_clear = clear_month.value_counts().reindex(months, fill_value=0).values

# 3) determine shared y‐limit
ymax = max(counts_all.max(), counts_clear.max()) * 1.1  # add 10% headroom

# 4) plot side‐by‐side
fig, axes = plt.subplots(1, 2, figsize=(12,4), sharey=True)

axes[0].bar(months, counts_all, width=0.8, edgecolor='black')
axes[0].set_title('Samples per Month (All)')
axes[0].set_xlabel('Month')
axes[0].set_ylabel('Count')
axes[0].set_xticks(months)
axes[0].set_ylim(0, ymax)

axes[1].bar(months, counts_clear, width=0.8, edgecolor='black')
axes[1].set_title('Samples per Month (High Quality Samples)')
axes[1].set_xlabel('Month')
axes[1].set_xticks(months)
axes[1].set_ylim(0, ymax)

plt.tight_layout()
plt.show()

In [None]:
def compute_minmax(column: str) -> pd.DataFrame:
    sql = f"""
        SELECT
        MIN({column}) AS minv,
        MAX({column}) AS maxv
        FROM samples_one
    """
    return con.execute(sql).fetchdf()

def compute_histogram(column: str, nbins: int = 30) -> pd.DataFrame:
    """
    Runs DuckDB's histogram() table function on `column` in samples_one (filtered to PSScene)
    and returns a DataFrame with columns: bin_upper, frequency.
    """
    sql = f"""
        WITH bounds AS (
          SELECT
            MIN({column}) AS mn,
            MAX({column}) AS mx
          FROM samples_one
        )
        SELECT
          -- histogram() returns a MAP<upper_boundary, count>
          histogram(
            {column},
            equi_width_bins(bounds.mn::DOUBLE, bounds.mx::DOUBLE, {nbins}::BIGINT, True)
          ) AS hist_map
        FROM samples_one
        CROSS JOIN bounds;
    """
    hist_map = con.execute(sql).fetchdf().iloc[0]["hist_map"]

    
    # Unpack into a two-column DataFrame
    df = pd.DataFrame({
        'bin_upper': list(hist_map.keys()),
        'count':     list(hist_map.values())
    })
    df = df.sort_values('bin_upper').reset_index(drop=True)
    uppers = df['bin_upper'].tolist()
    bin_size = uppers[1] - uppers[0]
    # Compute lower edge from previous upper
    lowest = uppers[0] - bin_size
    lowers = [lowest] + uppers[:-1]
    df["bin_lower"] = pd.Series(lowers)
    df["centers"] = (df["bin_lower"] + df['bin_upper']) / 2
    df["widths"]  = df['bin_upper'] - df["bin_lower"]
    return df

# Set up a 2×2 grid of histograms
fig, axes = plt.subplots(2, 2, figsize=(12, 8))
axes = axes.flatten()


# 2. Plotting all four angle columns
for ax, col in zip(axes, ["satellite_azimuth", "sun_azimuth", "sun_elevation", "view_angle"]):
    df_hist = compute_histogram(col, nbins=30)

    ax.bar(df_hist["centers"], df_hist['count'], width=df_hist["widths"] * 0.9)
    ax.set_title(f"Histogram of {col.replace('_',' ').title()}")
    ax.set_xlabel(col.replace('_',' ').title())
    ax.set_ylabel("Frequency")
    
fig.tight_layout()
plt.show()

In [None]:
# --- Histogram of clear_percent ---
fig, ax = plt.subplots(1, 1, figsize=(10, 4))

df_hist = compute_histogram("clear_percent", nbins=30)

ax.bar(df_hist["centers"], df_hist['count'], width=df_hist["widths"] * 0.9)
ax.set_title(f"Histogram of {'clear_percent'.title()}")
ax.set_xlabel("clear_percent".title())
ax.set_ylabel("Frequency")
    
fig.tight_layout()
plt.show()

In [5]:
from src.tides import find_nearest_coordinate, tide_model as create_tidal_model, datetimes_to_delta
from shapely.ops import transform
from pyproj import Transformer

local_crs = grid_df.estimate_utm_crs()
grid_point_local = grid_df.to_crs(local_crs).geometry.iloc[0].centroid
transformer = Transformer.from_crs(local_crs, grid_df.crs, always_xy=True)

# 2. Write a small wrapper that matches the shapely.ops.transform signature
def project(x, y, z=None):
    # pyproj returns (x2, y2) or (x2, y2, z2) depending on input
    if z is None:
        x2, y2 = transformer.transform(x, y)
        return x2, y2
    else:
        x2, y2, z2 = transformer.transform(x, y, z)
        return x2, y2, z2

# 4. Apply the projection
grid_point = transform(project, grid_point_local)

print(grid_point)

POINT (-47.71420246434783 -60.5792815383332)


In [None]:
tide_model = create_tidal_model(Path("/Users/kyledorman/data/tides"), "GOT4.10", "GOT")

In [None]:
latlons = np.array([[grid_point.y, grid_point.x]])
dove_tides = tide_model.tide_elevations(latlons, times=[sample_df.acquired])[0]
sample_df["tide_height"] = dove_tides
tides_year = np.load("../extracted/tides_2023.npy")

In [None]:
from scipy.signal import hilbert
# 1. build the analytic signal
analytic = hilbert(tides_year)

# 2. instantaneous phase in radians (–π → +π)
phase_rad = np.angle(analytic)
phase_rad = np.mod(phase_rad, 2*np.pi)

start = np.datetime64("2023-12-01T00:00")
end = np.datetime64("2024-12-01T00:00")
minutes = np.arange(start, end, np.timedelta64(1, "m"))

phase_series = pd.Series(phase_rad, index=pd.to_datetime(minutes))
phase_series = phase_series.sort_index()

sample_df['phase'] = (
    phase_series
    .reindex(sample_df['acquired'], method='nearest')
    .values
)


In [None]:
sample_df['phase']

In [None]:
# ── Inputs ────────────────────────────────────────────────────────────────────
# minute_heights       : 1D np.array of per-minute tide heights (floats)
# minute_phases_rad    : 1D np.array of per-minute tide phases (radians in [–π, π])
# samples_df           : pandas.DataFrame with columns
#                          'tide_height' (float)
#                          'phase'       (float in radians)
# ─────────────────────────────────────────────────────────────────────────────

# 1) Define common bin edges
phase_edges  = np.linspace(0, 2 * np.pi, 37)               # 36 equal bins over [–π, π]
height_edges = np.histogram_bin_edges(tides_year, bins=30)

# 2) Compute histogram counts for shared y‐limit
counts_phase_min,  _ = np.histogram(phase_rad,      bins=phase_edges)
counts_phase_samp, _ = np.histogram(sample_df['phase'],    bins=phase_edges)
counts_height_min,  _ = np.histogram(tides_year,        bins=height_edges)
counts_height_samp, _ = np.histogram(sample_df['tide_height'], bins=height_edges)

y_max_minutes = max(
    counts_phase_min.max(),
    counts_height_min.max(),
) * 1.1  # 10% headroom
y_max_samp = max(
    counts_phase_samp.max(),
    counts_height_samp.max(),
) * 1.1  # 10% headroom
# 3) Plot 2×2 grid of histograms
fig, axes = plt.subplots(2, 2, figsize=(12, 8))

# Top-left: Phase (minute series)
axes[0,0].hist(phase_rad, bins=phase_edges, edgecolor='black')
axes[0,0].set_title('Phase Histogram (Minute Series)')
axes[0,0].set_xlim(0, 2 * np.pi)
axes[0,0].set_ylabel('Count')
axes[0,0].set_ylim(0, y_max_minutes)

# Top-right: Phase (samples_df)
axes[0,1].hist(sample_df['phase'], bins=phase_edges, edgecolor='black')
axes[0,1].set_title('Phase Histogram (Samples)')
axes[0,1].set_xlim(0, 2 * np.pi)
axes[0,1].set_ylim(0, y_max_samp)

# Bottom-left: Height (minute series)
axes[1,0].hist(tides_year, bins=height_edges, edgecolor='black')
axes[1,0].set_title('Height Histogram (Minute Series)')
axes[1,0].set_xlim(height_edges[0], height_edges[-1])
axes[1,0].set_xlabel('Tide Height')
axes[1,0].set_ylabel('Count')
axes[1,0].set_ylim(0, y_max_minutes)

# Bottom-right: Height (samples_df)
axes[1,1].hist(sample_df['tide_height'], bins=height_edges, edgecolor='black')
axes[1,1].set_title('Height Histogram (Samples)')
axes[1,1].set_xlim(height_edges[0], height_edges[-1])
axes[1,1].set_xlabel('Tide Height')
axes[1,1].set_ylim(0, y_max_samp)

plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(30, 2))
plt.scatter(y=phase_rad[:10000], x=np.arange(len(phase_rad[:10000])), s=0.1, marker=".")

In [None]:
plt.figure(figsize=(30, 2))
plt.scatter(y=tides_year[:10000], x=np.arange(len(phase_rad[:10000])), s=0.1, marker=".")

In [None]:
num_bins = 36               # e.g. 10° bins
counts, bin_edges = np.histogram(
    phase_rad, bins=num_bins, range=(0, 2*np.pi)
)
width = bin_edges[1] - bin_edges[0]
angles = bin_edges[:-1]     # start angle of each bin

# 3) plot on a polar axis
fig = plt.figure()
ax = fig.add_subplot(projection='polar')
ax.bar(angles, counts, width=width, align='edge')

# 4) beautify axes
ax.set_theta_zero_location('N')    # 0° at top (high water)
ax.set_theta_direction(-1)         # clockwise
ax.set_xticks(np.deg2rad([0, 90, 180, 270]))
ax.set_xticklabels(['0° (High)', '90°', '180° (Low)', '270°'])
# ax.set_title('Tidal Phase Rose Plot')

plt.show()

In [None]:
num_bins = 36               # e.g. 10° bins
counts, bin_edges = np.histogram(
    sample_df.phase, bins=num_bins, range=(0, 2*np.pi)
)
width = bin_edges[1] - bin_edges[0]
angles = bin_edges[:-1]     # start angle of each bin

# 3) plot on a polar axis
fig = plt.figure()
ax = fig.add_subplot(projection='polar')
ax.bar(angles, counts, width=width, align='edge')

# 4) beautify axes
ax.set_theta_zero_location('N')    # 0° at top (high water)
ax.set_theta_direction(-1)         # clockwise
ax.set_xticks(np.deg2rad([0, 90, 180, 270]))
ax.set_xticklabels(['0° (High)', '90°', '180° (Low)', '270°'])
# ax.set_title('Tidal Phase Rose Plot')

plt.show()

In [None]:
sample_df = sample_df.sort_values(by="acquired")

In [None]:
NBINS = 5

In [None]:
# compute 10 equal‐width bins over the full range of all_heights
height_edges = np.histogram_bin_edges(tides_year, bins=NBINS)
height_edges[-1] += 1e-3

In [None]:
# this gives integer bins 0–9
sample_df['height_bin'] = pd.cut(
    sample_df['tide_height'],
    bins=height_edges,
    labels=False,
    include_lowest=True
)
assert not sample_df['height_bin'].isna().any()
sample_df['height_bin'].unique()

In [None]:
# NBINS equal bins around the circle, bin 0 = [0, 2π/10)
delta = 2*np.pi/NBINS
sample_df['phase_shift'] = (sample_df['phase'] + delta/2) % (2*np.pi)
phase_edges = np.linspace(0, 2*np.pi, NBINS+1)
phase_edges

In [None]:
sample_df['phase_bin'] = pd.cut(
    sample_df['phase_shift'],
    bins=phase_edges,
    labels=False,
    include_lowest=True,
    right=False
)
assert not sample_df['phase_bin'].isna().any()
sample_df['phase_bin'].unique()

In [None]:

phase_shift = (phase_rad + delta/2) % (2*np.pi)
minute_phase_bin = pd.cut(
    phase_shift,
    bins=phase_edges,
    labels=False,
    include_lowest=True,
    right=False
)
minute_height_bin = pd.cut(
    tides_year,
    bins=height_edges,
    labels=False,
    include_lowest=True,
    right=False
)
start = np.datetime64("2023-12-01T00:00")
end = np.datetime64("2024-12-01T00:00")
minutes = np.arange(start, end, np.timedelta64(1, "m"))

In [None]:
def thin_by_sliding_window(df, time_col='acquired', N=120):
    df2 = df.copy()
    df2[time_col] = pd.to_datetime(df2[time_col])
    df2 = df2.sort_values(time_col)

    keep = []
    last_time = None
    delta = pd.Timedelta(minutes=N)

    for idx, row in df2.iterrows():
        t = row[time_col]
        if last_time is None or (t - last_time) >= delta:
            keep.append(idx)
            last_time = t

    return df2.loc[keep].reset_index(drop=True)


In [None]:
# 1) Sort by time and compute successive differences
df_sorted = thin_by_sliding_window(sample_df.sort_values('acquired'), N=120)
full_diffs = df_sorted.acquired.diff().dt.total_seconds() / 3600.0

clear_sorted = df_sorted[
    (df_sorted['clear_percent'] > 80) & 
    (df_sorted.publishing_stage == "finalized") &
    (df_sorted.quality_category == "standard") &
    (df_sorted.has_sr_asset) &
    (df_sorted.ground_control)
]
clear_diffs = clear_sorted.acquired.diff().dt.total_seconds() / 3600.0

# 2) Drop the NaN from the first diff
full_diffs  = full_diffs.dropna()
clear_diffs = clear_diffs.dropna()

# 3) Plot histograms side-by-side
fig, ax = plt.subplots(1, 2, figsize=(12, 4), sharey=True)

xbins = np.histogram_bin_edges(np.concatenate([full_diffs.to_numpy(), clear_diffs.to_numpy()]), bins=30)

# Full dataset
ax[0].hist(full_diffs, bins=xbins, edgecolor='black')
ax[0].set_title('Acquisition Time Δ (All Samples)')
ax[0].set_xlabel('Δ Time (hours)')
ax[0].set_ylabel('Count')

# Clear >80% dataset
ax[1].hist(clear_diffs, bins=xbins, edgecolor='black')
ax[1].set_title('Acquisition Time Δ (High Quality Samples)')
ax[1].set_xlabel('Δ Time (hours)')

plt.tight_layout()
plt.show()

In [None]:
# 1) Sort by time and compute successive differences
df_sorted = thin_by_sliding_window(sample_df.sort_values('acquired'), N=120)
full_diffs = df_sorted['tide_height'].diff()

clear_sorted = df_sorted[
    (df_sorted['clear_percent'] > 80) & 
    (df_sorted.publishing_stage == "finalized") &
    (df_sorted.quality_category == "standard") &
    (df_sorted.has_sr_asset) &
    (df_sorted.ground_control)
]
clear_diffs = clear_sorted['tide_height'].diff()

# 2) Drop the NaN from the first diff
full_diffs  = full_diffs.dropna()
clear_diffs = clear_diffs.dropna()

# 3) Plot histograms side-by-side
fig, ax = plt.subplots(1, 2, figsize=(12, 4), sharey=True)

xbins = np.histogram_bin_edges(np.concatenate([full_diffs.to_numpy(), clear_diffs.to_numpy()]), bins=30)

# Full dataset
ax[0].hist(full_diffs, bins=xbins, edgecolor='black')
ax[0].set_title('Height Change Histogram (All Samples)')
ax[0].set_xlabel('Δ Height (meters)')
ax[0].set_ylabel('Count')

# Clear >80% dataset
ax[1].hist(clear_diffs, bins=xbins, edgecolor='black')
ax[1].set_title('Height Change Histogram (High Quality Samples)')
ax[1].set_xlabel('Δ Height (meters)')

plt.tight_layout()
plt.show()

In [None]:
# 2) Define circular difference wrapped to [–π, π]
def circular_diff(phases):
    raw = phases.diff()
    return ((raw + np.pi) % (2 * np.pi)) - np.pi

# 1) Sort by time and compute successive differences
df_sorted = thin_by_sliding_window(sample_df.sort_values('acquired'), N=120)
full_diffs = circular_diff(df_sorted['phase'])

clear_sorted = df_sorted[
    (df_sorted['clear_percent'] > 80) & 
    (df_sorted.publishing_stage == "finalized") &
    (df_sorted.quality_category == "standard") &
    (df_sorted.has_sr_asset) &
    (df_sorted.ground_control)
]
clear_diffs = circular_diff(clear_sorted['phase'])

# 2) Drop the NaN from the first diff
full_diffs  = full_diffs.dropna()
clear_diffs = clear_diffs.dropna()

# 3) Plot histograms side-by-side
fig, ax = plt.subplots(1, 2, figsize=(12, 4), sharey=True)

xbins = np.histogram_bin_edges(np.concatenate([full_diffs.to_numpy(), clear_diffs.to_numpy()]), bins=30)

# Full dataset
ax[0].hist(full_diffs, bins=xbins, edgecolor='black')
ax[0].set_title('Phase Change Histogram (All Samples)')
ax[0].set_xlabel('Δ Phase (radians)')
ax[0].set_ylabel('Count')

# Clear >80% dataset
ax[1].hist(clear_diffs, bins=xbins, edgecolor='black')
ax[1].set_title('Phase Change Histogram (High Quality Samples)')
ax[1].set_xlabel('Δ Phase (radians)')

plt.tight_layout()
plt.show()

In [None]:
# Parameters
high_phase_bin = 0
low_phase_bin = NBINS // 2
low_height_bin = 0
high_height_bin = NBINS - 1

# 3) Helper to compute hours between events
def compute_hourly_diffs(times):
    ints = times.astype('datetime64[s]').astype(np.int64)
    return np.diff(np.sort(ints)) / 3600.0

run_starts = np.concatenate((
    [True],
    minute_phase_bin[1:] != minute_phase_bin[:-1]
))
change_idx = np.nonzero(run_starts)[0]
# 2) pull out the first timestamp of each run
change_times = minutes[change_idx]
change_phase = minute_phase_bin[change_idx]

# 4) Extract intervals for each case
diffs = {
    'year_phase_high':  compute_hourly_diffs(change_times[change_phase == high_phase_bin]),
    'year_phase_low':   compute_hourly_diffs(change_times[change_phase == low_phase_bin]),
}

run_starts = np.concatenate((
    [True],
    minute_height_bin[1:] != minute_height_bin[:-1]
))
change_idx = np.nonzero(run_starts)[0]
# 2) pull out the first timestamp of each run
change_times = minutes[change_idx]
change_height = minute_height_bin[change_idx]

# usage
thinned = thin_by_sliding_window(sample_df, time_col='acquired', N=120)

diffs.update({
    'year_height_low':  compute_hourly_diffs(change_times[change_height == low_height_bin]),
    'year_height_high': compute_hourly_diffs(change_times[change_height == high_height_bin]),
    'dove_phase_high':   compute_hourly_diffs(thinned.loc[thinned['phase_bin']==high_phase_bin, 'acquired'].values).clip(0, 250),
    'dove_phase_low':    compute_hourly_diffs(thinned.loc[thinned['phase_bin']==low_phase_bin,  'acquired'].values).clip(0, 1000),
    'dove_height_low':   compute_hourly_diffs(thinned.loc[thinned['height_bin']==low_height_bin, 'acquired'].values),
    'dove_height_high':  compute_hourly_diffs(thinned.loc[thinned['height_bin']==high_height_bin,'acquired'].values).clip(0, 1000),
})

fig, axes = plt.subplots(nrows=2, ncols=4, figsize=(4 * 4, 2 * 4))

keys_list = [list(diffs.keys())[:4], list(diffs.keys())[4:]]

for i, keys in enumerate(keys_list):
    for j, k in enumerate(keys):
        vals = diffs[k]
        axes[i, j].hist(vals, bins=20)
        axes[i, j].set_xlabel('Interval (hours)')
        axes[i, j].set_ylabel('Count')
        axes[i, j].set_title(k.replace('_',' ').title() + ' Tide Intervals')

fig.tight_layout()   
plt.show()

In [None]:
months = np.arange(1, 13)

# helper to get monthly coverage series
def monthly_coverage(df, col):
    # nunique bin per month
    s = df.groupby(df['acquired'].dt.month)[col].nunique()
    # % of total bins
    return (s.reindex(months, fill_value=0) / NBINS * 100).values

# 1) compute for all rows
phase_cov_all  = monthly_coverage(sample_df, 'phase_bin')
height_cov_all = monthly_coverage(sample_df, 'height_bin')

# 2) compute for clear > 80%
df_clear = sample_df[
    (sample_df['clear_percent'] > 80) & 
    (sample_df.publishing_stage == "finalized") &
    (sample_df.quality_category == "standard") &
    (sample_df.has_sr_asset) &
    (sample_df.ground_control)
]
phase_cov_clr  = monthly_coverage(df_clear, 'phase_bin')
height_cov_clr = monthly_coverage(df_clear, 'height_bin')

# 3) plot
width = 0.35
x = months

fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(10, 5), sharey=True)

# Phase coverage
axes[0].bar(x - width/2, phase_cov_all,  width, label='All')
axes[0].bar(x + width/2, phase_cov_clr,  width, label='High Quality Samples')
axes[0].set_xticks(months)
axes[0].set_xlabel('Month')
axes[0].set_ylabel('Coverage (%)')
axes[0].set_title('Monthly Tidal Phase Coverage')
axes[0].set_ylim(0, 100)
axes[0].legend()

# Height coverage
axes[1].bar(x - width/2, height_cov_all, width, label='All')
axes[1].bar(x + width/2, height_cov_clr, width, label='High Quality Samples')
axes[1].set_xticks(months)
axes[1].set_xlabel('Month')
axes[1].set_ylabel('Coverage (%)')
axes[1].set_title('Monthly Tidal Height Coverage')
axes[1].set_ylim(0, 100)
axes[1].legend()
fig.tight_layout()
plt.show()

In [None]:
months = np.arange(1, 13)

year_df = pd.DataFrame({
    'phase_bin': minute_phase_bin,
    'height_bin': minute_height_bin,
    'acquired': minutes
})

# 1) compute for all rows
phase_cov_all  = monthly_coverage(year_df, 'phase_bin')
height_cov_all = monthly_coverage(year_df, 'height_bin')

# 3) plot
width = 0.35
x = months

fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(10, 5))

# Phase coverage
axes[0].bar(x - width/2, phase_cov_all,  width, label='All')
axes[0].set_xticks(months)
axes[0].set_xlabel('Month')
axes[0].set_ylabel('Coverage (%)')
axes[0].set_title('Monthly Tidal Phase Coverage')
axes[0].set_ylim(0, 100)
axes[0].legend()

# Height coverage
axes[1].bar(x - width/2, height_cov_all, width, label='All')
axes[1].set_xticks(months)
axes[1].set_xlabel('Month')
axes[1].set_ylabel('Coverage (%)')
axes[1].set_title('Monthly Tidal Height Coverage')
axes[1].set_ylim(0, 100)
axes[1].legend()
fig.tight_layout()
plt.show()

In [None]:
# parameters
months = np.arange(1,13)
high_phase_bins  = 0
low_phase_bins   = NBINS//2
low_height_bin  = 0
high_height_bin = NBINS-1

# helper to get monthly counts
def monthly_counts(df, col, bin_idx):
    if isinstance(bin_idx, list):
        s = df[df[col].isin(bin_idx)].groupby(df['acquired'].dt.month).size()
    else:
        s = df[df[col]==bin_idx].groupby(df['acquired'].dt.month).size()
    return s.reindex(months, fill_value=0).values

# compute counts
counts = {
    'phase_high_all':  monthly_counts(sample_df,       'phase_bin',  high_phase_bins),
    'phase_high_clr':  monthly_counts(df_clear, 'phase_bin',  high_phase_bins),
    'phase_low_all':   monthly_counts(sample_df,       'phase_bin',  low_phase_bins),
    'phase_low_clr':   monthly_counts(df_clear, 'phase_bin',  low_phase_bins),
    'height_high_all': monthly_counts(sample_df,       'height_bin', high_height_bin),
    'height_high_clr': monthly_counts(df_clear, 'height_bin', high_height_bin),
    'height_low_all':  monthly_counts(sample_df,       'height_bin', low_height_bin),
    'height_low_clr':  monthly_counts(df_clear, 'height_bin', low_height_bin),
}

# plot 2x2
fig, axs = plt.subplots(2, 2, figsize=(11,6), sharex=True, sharey=True)
width = 0.35
x = months

# top-left: phase high
axs[0,0].bar(x-width/2, counts['phase_high_all'],  width, label='All')
axs[0,0].bar(x+width/2, counts['phase_high_clr'],  width, label='High Quality Samples')
axs[0,0].set_title('Phase High Tide Count')

# top-right: phase low
axs[0,1].bar(x-width/2, counts['phase_low_all'],   width, label='All')
axs[0,1].bar(x+width/2, counts['phase_low_clr'],   width, label='High Quality Samples')
axs[0,1].set_title('Phase Low Tide Count')

# bottom-left: height high
axs[1,0].bar(x-width/2, counts['height_high_all'], width, label='All')
axs[1,0].bar(x+width/2, counts['height_high_clr'], width, label='High Quality Samples')
axs[1,0].set_title('Height High Tide Count')

# bottom-right: height low
axs[1,1].bar(x-width/2, counts['height_low_all'],  width, label='All')
axs[1,1].bar(x+width/2, counts['height_low_clr'],  width, label='High Quality Samples')
axs[1,1].set_title('Height Low Tide Count')

# common formatting
for ax in axs.flat:
    ax.set_xticks(months)
    ax.set_xlabel('Month')
    ax.set_ylabel('Count')
    ax.legend()

fig.tight_layout()
plt.show()

In [None]:
# parameters
months = np.arange(1,13)
high_phase_bins  = 0
low_phase_bins   = NBINS//2
low_height_bin  = 0
high_height_bin = NBINS-1

# helper to get monthly counts
def monthly_counts(df, col, bin_idx):
    if isinstance(bin_idx, list):
        s = df[df[col].isin(bin_idx)].groupby(df['acquired'].dt.month).size()
    else:
        s = df[df[col]==bin_idx].groupby(df['acquired'].dt.month).size()
    return s.reindex(months, fill_value=0).values

# compute counts
counts = {
    'phase_high_all':  monthly_counts(year_df,       'phase_bin',  high_phase_bins),
    'phase_low_all':   monthly_counts(year_df,       'phase_bin',  low_phase_bins),
    'height_high_all': monthly_counts(year_df,       'height_bin', high_height_bin),
    'height_low_all':  monthly_counts(year_df,       'height_bin', low_height_bin),
}

# plot 2x2
fig, axs = plt.subplots(2, 2, figsize=(11,6), sharex=True)
width = 0.35
x = months

# top-left: phase high
axs[0,0].bar(x-width/2, counts['phase_high_all'],  width, label='All')
# axs[0,0].bar(x+width/2, counts['phase_high_clr'],  width, label='High Quality Samples')
axs[0,0].set_title('Phase High Tide Count')

# top-right: phase low
axs[0,1].bar(x-width/2, counts['phase_low_all'],   width, label='All')
# axs[0,1].bar(x+width/2, counts['phase_low_clr'],   width, label='High Quality Samples')
axs[0,1].set_title('Phase Low Tide Count')

# bottom-left: height high
axs[1,0].bar(x-width/2, counts['height_high_all'], width, label='All')
# axs[1,0].bar(x+width/2, counts['height_high_clr'], width, label='High Quality Samples')
axs[1,0].set_title('Height High Tide Count')

# bottom-right: height low
axs[1,1].bar(x-width/2, counts['height_low_all'],  width, label='All')
# axs[1,1].bar(x+width/2, counts['height_low_clr'],  width, label='High Quality Samples')
axs[1,1].set_title('Height Low Tide Count')

# common formatting
for ax in axs.flat:
    ax.set_xticks(months)
    ax.set_xlabel('Month')
    ax.set_ylabel('Count')
    ax.legend()

fig.tight_layout()
plt.show()

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# 1) Rename your thinned DF to df_thin (or adjust the name below)
df_thin = sample_df.copy() # thin_by_sliding_window(sample_df, time_col='acquired', N=60)
df_thin = df_thin.sort_values('acquired')

# 2) Prepare
bin_count = NBINS
bins = np.arange(bin_count)
matrix = np.full((bin_count, bin_count), np.nan)
percentile = 50

# 3) For each start‐bin i and target‐bin j, find the 90th-percentile Δt
for i in bins:
    # all times when we saw phase = i
    times_i = df_thin.loc[df_thin['phase_bin'] == i, 'acquired']
    # wrap into a frame for merging
    df_i = pd.DataFrame({'time_i': times_i.sort_values().values})

    for j in bins:
        # all times when we saw phase = j
        times_j = df_thin.loc[df_thin['phase_bin'] == j, 'acquired']
        df_j = pd.DataFrame({'time_j': times_j.sort_values().values})

        # for each i, find the next j at or after it
        merged = pd.merge_asof(
            df_i, 
            df_j, 
            left_on='time_i', 
            right_on='time_j', 
            direction='forward'
        )

        # compute Δt in hours
        deltas = (merged['time_j'] - merged['time_i']).dt.total_seconds() / 3600.0 / 24.0
        deltas = deltas.dropna()
        if len(deltas):
            matrix[i, j] = np.percentile(deltas, percentile)

# 4) Plot heatmap of the 90th percentile Δt
fig, ax = plt.subplots(figsize=(8, 6))
im = ax.imshow(matrix, origin='lower', aspect='auto', cmap='viridis')

# axis labels
ax.set_xticks(bins)
ax.set_yticks(bins)
ax.set_xlabel('Target phase bin (j)')
ax.set_ylabel('Start phase bin (i)')
ax.set_title('90th Percentile Time (days) Between Phase Bins')

# annotate each cell
for i in bins:
    for j in bins:
        val = matrix[i, j]
        if not np.isnan(val):
            ax.text(j, i, f"{val:.1f}", ha='center', va='center', color='white', fontsize=8)

# colorbar
cbar = fig.colorbar(im, ax=ax, label='Days (90th percentile)')

plt.tight_layout()
plt.show()

In [None]:
# Create the base map centered on the calculated location
centroid = grid_df.geometry.iloc[0].centroid
base_map = folium.Map(location=[centroid.y, centroid.x], zoom_start=2, width=1000, height=800)

for geo in grid_df.geometry:
    folium.GeoJson(
        geo,
    ).add_to(base_map)
base_map

In [None]:
df_thin = sample_df.copy() # thin_by_sliding_window(sample_df, time_col='acquired', N=60)
df_thin = df_thin.sort_values('acquired')

# 2) Prepare
bin_count = NBINS
bins = np.arange(bin_count)
matrix = np.full((bin_count, bin_count), np.nan)
percentile = 90

# 3) For each start‐bin i and target‐bin j, find the 90th-percentile Δt
for i in bins:
    # all times when we saw phase = i
    times_i = df_thin.loc[df_thin['height_bin'] == i, 'acquired']
    # wrap into a frame for merging
    df_i = pd.DataFrame({'time_i': times_i.sort_values().values})

    for j in bins:
        # all times when we saw phase = j
        times_j = df_thin.loc[df_thin['height_bin'] == j, 'acquired']
        df_j = pd.DataFrame({'time_j': times_j.sort_values().values})

        # for each i, find the next j at or after it
        merged = pd.merge_asof(
            df_i, 
            df_j, 
            left_on='time_i', 
            right_on='time_j', 
            direction='forward'
        )

        # compute Δt in hours
        deltas = (merged['time_j'] - merged['time_i']).dt.total_seconds() / 3600.0 / 24.0
        deltas = deltas.dropna()
        if len(deltas):
            matrix[i, j] = np.percentile(deltas, percentile)

# 4) Plot heatmap of the 90th percentile Δt
fig, ax = plt.subplots(figsize=(8, 6))
im = ax.imshow(matrix, origin='lower', aspect='auto', cmap='viridis')

# axis labels
ax.set_xticks(bins)
ax.set_yticks(bins)
ax.set_xlabel('Target height bin (j)')
ax.set_ylabel('Start height bin (i)')
ax.set_title('90th Percentile Time (days) Between Height Bins')

# annotate each cell
for i in bins:
    for j in bins:
        val = matrix[i, j]
        if not np.isnan(val):
            ax.text(j, i, f"{val:.1f}", ha='center', va='center', color='white', fontsize=8)

# colorbar
cbar = fig.colorbar(im, ax=ax, label='Days (90th percentile)')

plt.tight_layout()
plt.show()

In [None]:
for col in tide_df.columns:
    tide_df.loc[tide_df[col].isna(), col] = 365.0
tide_df

In [None]:
import matplotlib.pyplot as plt
from matplotlib.cm import ScalarMappable
import matplotlib as mpl

# ───────────────────────────────────────────────────────────────
# 1. polygons → centroids and join metrics
# ───────────────────────────────────────────────────────────────
gdf_pts = (
    grid_df.to_crs(all_grids_df.crs)        # CRS match
           .assign(geometry=lambda d: d.geometry.centroid)
           .merge(tide_df, on="cell_id")
)

# ───────────────────────────────────────────────────────────────
# 2. axes layout: rows = sensors, cols = metrics
# ───────────────────────────────────────────────────────────────
sensors       = ["planet", "sentinel", "landsat"]
base_metrics  = ["count"]
metrics       = [f"{lvl}_{m}" for m in base_metrics for lvl in ("low", "high")]

nrows, ncols  = len(sensors), len(metrics)
fig, axes = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * 5, nrows * 2),
    constrained_layout=True,
)

# ───────────────────────────────────────────────────────────────
# 3.  loop over columns (metrics) to set a shared scale per column
# ───────────────────────────────────────────────────────────────
cmap = "viridis"

for c, metric in enumerate(metrics):
    # shared vmin/vmax across sensors for this metric
    col_values = [f"{sat}_{metric}" for sat in sensors]
    vmin = gdf_pts[col_values].min().min()
    vmax = gdf_pts[col_values].max().max()
    norm = mpl.colors.Normalize(vmin=vmin, vmax=vmax)

    for r, sensor in enumerate(sensors):
        ax      = axes[r, c]
        colname = f"{sensor}_{metric}"

        gdf_pts.plot(
            column     = colname,
            ax         = ax,
            cmap       = cmap,
            norm       = norm,
            marker     = "o",
            markersize = 1,
            linewidth  = 0,
            legend     = False,
        )

        # titles: top row gets metric title; first column gets sensor label
        ax.set_title(f"{sensor.title()} {metric.replace('_', ' ').title()}")
        ax.axis("off")

    # add ONE colour‑bar for the whole column
    sm = ScalarMappable(norm=norm, cmap=cmap);  sm.set_array([])
    cax = fig.colorbar(sm, ax=axes[0, c], shrink=0.6, pad=0.02, location="right")
    cax.ax.set_ylabel(metric.replace('_', ' ').title())

plt.savefig("/Users/kyledorman/Desktop/tide_count.png", dpi=300)
plt.show()

In [None]:
import matplotlib.pyplot as plt
from matplotlib.cm import ScalarMappable
import matplotlib as mpl

# ───────────────────────────────────────────────────────────────
# 1. polygons → centroids and join metrics
# ───────────────────────────────────────────────────────────────
gdf_pts = (
    grid_df.to_crs(all_grids_df.crs)        # CRS match
           .assign(geometry=lambda d: d.geometry.centroid)
           .merge(tide_df, on="cell_id")
)

# ───────────────────────────────────────────────────────────────
# 2. axes layout: rows = sensors, cols = metrics
# ───────────────────────────────────────────────────────────────
sensors       = ["planet", "sentinel", "landsat"]
base_metrics  = ["days_between_p95"]
metrics       = [f"{lvl}_{m}" for m in base_metrics for lvl in ("low", "high")]

nrows, ncols  = len(sensors), len(metrics)
fig, axes = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(ncols * 5, nrows * 2),
    constrained_layout=True,
)

# ───────────────────────────────────────────────────────────────
# 3.  loop over columns (metrics) to set a shared scale per column
# ───────────────────────────────────────────────────────────────
cmap = "viridis"

for c, metric in enumerate(metrics):
    # shared vmin/vmax across sensors for this metric
    col_values = [f"{sat}_{metric}" for sat in sensors]
    vmin = 0 # gdf_pts[col_values].min().min()
    vmax = gdf_pts[col_values].max().max()
    norm = mpl.colors.Normalize(vmin=vmin, vmax=vmax)

    for r, sensor in enumerate(sensors):
        ax      = axes[r, c]
        colname = f"{sensor}_{metric}"

        gdf_pts.plot(
            column     = colname,
            ax         = ax,
            cmap       = cmap,
            norm       = norm,
            marker     = "o",
            markersize = 1,
            linewidth  = 0,
            legend     = False,
        )

        # titles: top row gets metric title; first column gets sensor label
        ax.set_title(f"{sensor.title()} {metric.replace('_', ' ').title()}")
        ax.axis("off")

    # add ONE colour‑bar for the whole column
    sm = ScalarMappable(norm=norm, cmap=cmap);  sm.set_array([])
    cax = fig.colorbar(sm, ax=axes[0, c], shrink=0.6, pad=0.02, location="right")
    cax.ax.set_ylabel(metric.replace('_', ' ').title())

plt.savefig("/Users/kyledorman/Desktop/tide_days_between_p95.png", dpi=300)
plt.show()

In [None]:
sensors       = ["planet", "sentinel", "landsat"]
base_metrics  = ["count"]
metrics       = [f"{lvl}_{m}" for m in base_metrics for lvl in ("low", "high")]

for metric in metrics:
    print(metric, "% No observations")
    for sensor in sensors:
        print(sensor, round(100 * (tide_df[f'{sensor}_{metric}'] == 0).sum() / len(tide_df), 1))

In [None]:
sensors       = ["planet", "sentinel", "landsat"]
base_metrics  = ["days_between_p95"]
metrics       = [f"{lvl}_{m}" for m in base_metrics for lvl in ("low", "high")]

for metric in metrics:
    print(metric, "% > 100 days")
    for sensor in sensors:
        print(sensor, round(100 * (tide_df[f'{sensor}_{metric}'] > 100).sum() / len(tide_df), 1))