In [None]:
import warnings
from pathlib import Path

import matplotlib.pyplot as plt
import pandas as pd
import geopandas as gpd

from src.plotting.util import load_grids, plot_gdf_column

warnings.filterwarnings("ignore")  # hide every warning

In [None]:
BASE = Path("/Users/kyledorman/data/planet_coverage/points_30km/")
SHORELINES = BASE.parent / "shorelines"

In [None]:
query_df, all_grids_df, hex_grid = load_grids(SHORELINES)
MIN_DIST = 20.0
lats = all_grids_df.centroid.y
valid = ~all_grids_df.is_land & ~all_grids_df.dist_km.isna() & (all_grids_df.dist_km < MIN_DIST) & (lats > -81.5) & (lats < 81.5)
grids_df = all_grids_df[valid].copy()

In [None]:
counts = grids_df[grids_df.hex_id >=0].hex_id.value_counts()

print((counts < 15).sum())

size = 10

# Step 2: Plot a histogram of these counts
plt.figure(figsize=(10, 6))
plt.hist(counts, bins=range(0, counts.max() + size, size))  # bin by sizes
plt.xlabel('Number of rows per hex_id')
plt.ylabel('Number of hex_ids')
plt.title('Distribution of Row Counts per hex_id')
plt.tight_layout()
plt.savefig("/Users/kyledorman/data/planet_coverage/figs/displays/hex_grid_counts.png")
plt.show()

In [None]:
hex_counts = hex_grid.join(counts, how='inner').rename(columns={"count": "grid_count"})
hex_counts

In [None]:
plot_gdf_column(
    hex_counts,
    'grid_count',
    show=True,
    save_path="/Users/kyledorman/data/planet_coverage/figs/displays/hex_counts.png",
    vmax=30,
)

In [None]:
plot_gdf_column(
    hex_counts[hex_counts.grid_count < 30],
    'grid_count',
    show=True,
    save_path="/Users/kyledorman/data/planet_coverage/figs/displays/hex_counts_filtered.png",
)

In [None]:
plot_gdf_column(
    hex_counts[hex_counts.grid_count >= 20],
    'grid_count',
    show=True,
    save_path="/Users/kyledorman/data/planet_coverage/figs/displays/hex_counts_filtered_rem.png",
)