In [1313]:
# ----------------------------------------- #
#                  MODULES                  #

# Standard Modules
import os

# Third-Party Modules
import h3
import geopandas as gpd
import numpy as np
import pandas as pd
import plotly.express as px
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import confusion_matrix
from scipy.spatial.distance import cdist

#                                           #
# ----------------------------------------- #

# ----------------------------------------- #
#                 FUNCTIONS                 #


# Compute pairwise distances (Haversine) in km
def haversine(latlon1, latlon2):
    lat1, lon1 = np.radians(latlon1[:, 0]), np.radians(latlon1[:, 1])
    lat2, lon2 = np.radians(latlon2[:, 0]), np.radians(latlon2[:, 1])
    dlat = lat2[:, None] - lat1[None, :]
    dlon = lon2[:, None] - lon1[None, :]
    a = (
        np.sin(dlat / 2) ** 2
        + np.cos(lat1[None, :]) * np.cos(lat2[:, None]) * np.sin(dlon / 2) ** 2
    )
    c = 2 * np.arcsin(np.sqrt(a))
    return 6371 * c  # Earth radius in km


# Data Opener for Sightings Data
def open_sightings(path):
    if os.path.exists(path):
        if ".csv" in path:
            sightings = pd.read_csv(path)
            return sightings
        elif ".parquet" in path:
            sightings = pd.read_parquet(path)
            return sightings
        else:
            print("WARNING: Path is not a supported file type.")
    else:
        print("WARNING: Path does not exist.")


# Quick Preprocess on Read of Sightings Data
def quick_preprocess(data, H3_RESOLUTION):
    # Preprocess Sightings - Remove Spurious Data
    data["LONGITUDE"] = np.where(
        data["LONGITUDE"] > 0, data["LONGITUDE"] * -1, data["LONGITUDE"]
    )
    data = data[(data["LONGITUDE"] < -115) & (data["LONGITUDE"] > -160)]

    # Expand by COUNT
    data = data.loc[data.index.repeat(data["COUNT"])].reset_index(drop=True)

    # Assign H3 cell (parallelized with swifter)
    data["H3_CELL"] = data.apply(
        lambda x: h3.latlng_to_cell(x["LATITUDE"], x["LONGITUDE"], H3_RESOLUTION),
        axis=1,
    )

    return data


# Threshold Metrics
def threshold_metrics_with_cm(df, prob_col, target_col, thresholds=None):
    """
    Compute accuracy, precision, recall, F1, and CM percentages for multiple thresholds.

    Returns a DataFrame with:
    threshold, accuracy, precision, recall, f1, TN_pct, FP_pct, FN_pct, TP_pct
    """
    if thresholds is None:
        thresholds = [i / 20 for i in range(21)]  # 0.0, 0.05, ..., 1.0

    metrics_list = []

    for t in thresholds:
        preds = df[prob_col] >= t
        tn, fp, fn, tp = confusion_matrix(df[target_col], preds).ravel()
        total = tn + fp + fn + tp

        metrics_list.append(
            {
                "threshold": t,
                "accuracy": accuracy_score(df[target_col], preds),
                "precision": precision_score(df[target_col], preds, zero_division=0),
                "recall": recall_score(df[target_col], preds, zero_division=0),
                "f1": f1_score(df[target_col], preds, zero_division=0),
                "TN_pct": tn / total,
                "FP_pct": fp / total,
                "FN_pct": fn / total,
                "TP_pct": tp / total,
            }
        )

    return pd.DataFrame(metrics_list)


#                                           #
# ----------------------------------------- #

In [1314]:
# Parameters
H3_RESOLUTION = 6  # Target Resolution
SIGHTINGS_PATH = (
    "../../data/processed/ORCA_SIGHTINGS/ORCA_SIGHTINGS.parquet"  # Data Paths
)
WATER_POLYGONS_PATH = (
    f"../../data/processed/GIS/H3_{H3_RESOLUTION}_Polygons_With_Water_Coverage.parquet"
)

In [1315]:
# Open sightings and preprocess
sightings_data = open_sightings(SIGHTINGS_PATH)
sightings_data = quick_preprocess(sightings_data, H3_RESOLUTION)
sightings_data.DATE = pd.to_datetime(sightings_data.DATE)
sightings_data["POD_TYPE"] = np.where(
    sightings_data["POD_TAG"] == "T", "TRANSIENT", sightings_data["POD_TYPE"]
)
sightings_data["POD_TYPE"] = np.where(
    sightings_data["POD_TAG"] == "O", "OTHER", sightings_data["POD_TYPE"]
)

# Date range
start_date = sightings_data["DATE"].min()
end_date = sightings_data["DATE"].max()

In [1316]:
# Open Water Polygons
water_polygons = gpd.read_parquet(WATER_POLYGONS_PATH).rename(
    columns={"h3_index": "H3_CELL"}
)

In [1317]:
sightings_data[["POD_TAG", "POD_TYPE"]].drop_duplicates()

Unnamed: 0,POD_TAG,POD_TYPE
0,O,OTHER
11,K,SRKW
12,L,SRKW
52,J,SRKW
132,T,TRANSIENT
160171,A,NRKW
160172,G,NRKW
160173,R,NRKW


In [1318]:
# pod_type = "SRKW"

pod_tag = ["K", "L", "J"]  # , "T", "O"]

metric = "DOY"
metric_end_date_test = 27  # 205

# split_dates = ("2020-01-01", "2022-01-01")
split_dates = ("2024-01-01", "2025-08-01")
split_dates = (pd.to_datetime(split_dates[0]), pd.to_datetime(split_dates[1]))

In [1319]:
# 1. Make sightings data scaled such that it can be compared over time.
# --- Filter for the pod type ---
sightings_data_pod = sightings_data[
    (sightings_data.POD_TAG.isin(pod_tag))
    & ((sightings_data.DATE < split_dates[0]) | (sightings_data.DATE > split_dates[1]))
]

# Clip by Water Poly Area
sightings_data_pod = pd.merge(sightings_data_pod, water_polygons, how="left")
sightings_data_pod["water_covers"] = sightings_data_pod["water_covers"].fillna(1)

sightings_data_pod["COUNT_OVER_AREA"] = (
    sightings_data_pod["COUNT"] / sightings_data_pod["water_covers"]
)

In [1320]:
# --- Calculate per-cell counts ---
sightings_data_pod_cell_totals = sightings_data_pod.groupby(
    ["DATE", metric, "H3_CELL"], as_index=False
).agg(CELL_COUNT=("COUNT_OVER_AREA", "sum"))

# --- Calculate daily totals ---
sightings_data_pod_date_totals = sightings_data_pod.groupby(
    ["DATE", metric], as_index=False
).agg(TOTAL_COUNT=("COUNT_OVER_AREA", "sum"))

# --- Merge and compute ratio ---
sightings_data_pod_ratios = pd.merge(
    sightings_data_pod_cell_totals,
    sightings_data_pod_date_totals,
    on=["DATE", metric],
    how="left",
)
sightings_data_pod_ratios["CELL_RATIO"] = (
    sightings_data_pod_ratios["CELL_COUNT"] / sightings_data_pod_ratios["TOTAL_COUNT"]
)

In [1321]:
# Use continuous date range to ensure zero-activity days are included
all_dates = pd.date_range(
    sightings_data_pod["DATE"].min(), sightings_data_pod["DATE"].max(), freq="D"
)
all_cells = sightings_data_pod["H3_CELL"].unique()

full_index = pd.MultiIndex.from_product(
    [all_dates, all_cells], names=["DATE", "H3_CELL"]
)
full_df = pd.DataFrame(index=full_index).reset_index()

# Ensure DATE columns are datetime
sightings_data_pod_ratios["DATE"] = pd.to_datetime(sightings_data_pod_ratios["DATE"])
full_df["DATE"] = pd.to_datetime(full_df["DATE"])

# --- Merge with ratios and fill missing ---
full_ratios = full_df.merge(
    sightings_data_pod_ratios[["DATE", "H3_CELL", "CELL_RATIO"]],
    on=["DATE", "H3_CELL"],
    how="left",
)

In [1322]:
# --- Build full date × cell grid ---
# Fill Method
# fill_method = "zero"
fill_method = "mean"

if fill_method == "zero":
    full_ratios["CELL_RATIO"] = full_ratios["CELL_RATIO"].fillna(0)

    if metric == "DOY":
        full_ratios[metric] = full_ratios["DATE"].dt.day_of_year

    elif metric == "WOY":
        full_ratios[metric] = full_ratios["DATE"].dt.isocalendar().week

    # --- Final output ---
    sightings_data_pod_ratios = full_ratios[["DATE", metric, "H3_CELL", "CELL_RATIO"]]

elif fill_method == "mean":
    if metric == "DOY":
        full_ratios[metric] = full_ratios["DATE"].dt.day_of_year

    elif metric == "WOY":
        full_ratios[metric] = full_ratios["DATE"].dt.isocalendar().week

    # --- Final output ---
    sightings_data_pod_ratios = full_ratios[["DATE", metric, "H3_CELL", "CELL_RATIO"]]

    sightings_data_pod_ratios_means = sightings_data_pod_ratios.groupby(
        ["H3_CELL", metric], as_index=False
    ).agg(MEAN_CELL_RATIO=("CELL_RATIO", "mean"))

    sightings_data_pod_ratios = pd.merge(
        sightings_data_pod_ratios, sightings_data_pod_ratios_means, how="outer"
    )
    sightings_data_pod_ratios["MEAN_CELL_RATIO"] = sightings_data_pod_ratios[
        "MEAN_CELL_RATIO"
    ].fillna(0)
    sightings_data_pod_ratios["CELL_RATIO"] = sightings_data_pod_ratios[
        "CELL_RATIO"
    ].fillna(sightings_data_pod_ratios["MEAN_CELL_RATIO"])

In [1323]:
# Average By WOY to Get General Sense
sightings_data_pod_ratios = sightings_data_pod_ratios.groupby(
    ["H3_CELL", metric], as_index=False
)["CELL_RATIO"].mean()

In [1324]:
## Divide by Area of Water in Cell to Weight by Water Coverage
# (i.e., if cell is 50% covered, then sighting density will be higher than one that is 100% covered if count remains constant)
doy_list = list(sightings_data_pod_ratios[metric].unique())

water_polygons_nosightings = water_polygons[
    ~water_polygons.H3_CELL.isin(sightings_data_pod_ratios.H3_CELL.unique())
]
water_polygons_nosightings = water_polygons_nosightings[["H3_CELL"]]
water_polygons_nosightings[metric] = water_polygons_nosightings.apply(
    lambda x: doy_list, axis=1
)
water_polygons_nosightings = water_polygons_nosightings.explode(metric)

sightings_data_pod_ratios = pd.concat(
    [sightings_data_pod_ratios, water_polygons_nosightings]
)

sightings_data_pod_ratios["CELL_RATIO"] = sightings_data_pod_ratios[
    "CELL_RATIO"
].fillna(0)

In [1325]:
# Unique cells and their coordinates
cells = sightings_data_pod_ratios["H3_CELL"].unique()
cell_coords = np.array([h3.cell_to_latlng(cell) for cell in cells])

# Distance matrix in km
dist_matrix = haversine(cell_coords, cell_coords)

# Gaussian weights
sigma_km = 5
weights = np.exp(-(dist_matrix**2) / (2 * sigma_km**2))

# Container for smoothed results
smoothed_list = []

# Loop over each DOY
for doy, subdf in sightings_data_pod_ratios.groupby(metric):
    # Get mean probability per cell for this DOY
    proba_vector = (
        subdf.groupby("H3_CELL")["CELL_RATIO"].mean().reindex(cells).fillna(0).values
    )

    # Spatial smoothing
    smoothed_proba = weights @ proba_vector / weights.sum(axis=1)

    # Map smoothed values back to the rows in this DOY
    subdf["CELL_RATIO_smooth"] = subdf["H3_CELL"].map(dict(zip(cells, smoothed_proba)))

    smoothed_list.append(subdf)

# Combine all DOYs
sightings_data_pod_ratios = pd.concat(smoothed_list, ignore_index=True)

In [1326]:
sightings_data_pod_ratios = pd.merge(
    sightings_data_pod_ratios, water_polygons[["H3_CELL", "geometry"]], how="left"
)
sightings_data_pod_ratios = gpd.GeoDataFrame(
    sightings_data_pod_ratios, geometry="geometry", crs="EPSG:4326"
)

In [1327]:
sightings_data_pod_ratios_minmax = sightings_data_pod_ratios.groupby(
    [metric], as_index=False
).agg(
    MAX_SMOOTHED_RATIO=("CELL_RATIO_smooth", "max"),
    MIN_SMOOTHED_RATIO=("CELL_RATIO_smooth", "min"),
)

In [1328]:
sightings_data_pod_ratios = pd.merge(
    sightings_data_pod_ratios, sightings_data_pod_ratios_minmax
)
sightings_data_pod_ratios["CELL_RATIO_smooth_Scaled"] = (
    sightings_data_pod_ratios["CELL_RATIO_smooth"]
    - sightings_data_pod_ratios["MIN_SMOOTHED_RATIO"]
) / (
    sightings_data_pod_ratios["MAX_SMOOTHED_RATIO"]
    - sightings_data_pod_ratios["MIN_SMOOTHED_RATIO"]
)

In [1329]:
sightings_data_pod_ratios[sightings_data_pod_ratios[metric] == 244].explore(
    "CELL_RATIO_smooth_Scaled", cmap="turbo"
).save("smooth_200.html")

In [1330]:
# For each H3_CELL, find the index of the max CELL_RATIO_smooth_Scaled
idx = sightings_data_pod_ratios.groupby("H3_CELL")["CELL_RATIO_smooth_Scaled"].idxmax()

# Select the rows corresponding to that max
max_doy_per_cell = sightings_data_pod_ratios.loc[
    idx, ["H3_CELL", metric, "CELL_RATIO_smooth_Scaled"]
].reset_index(drop=True)

max_doy_per_cell = pd.merge(
    water_polygons[["H3_CELL", "geometry"]], max_doy_per_cell, on="H3_CELL"
)
max_doy_per_cell = max_doy_per_cell.sort_values(metric).reset_index(drop=True)

In [1331]:
# Make sure DOY is an integer
max_doy_per_cell[metric] = max_doy_per_cell[metric].astype(int)

# Set min and max for the color scale
if metric == "DOY":
    vmin = 1
    vmax = 365

elif metric == "WOY":
    vmin = 1
    vmax = 52

# Explore with continuous colormap, explicitly setting vmin and vmax
max_doy_per_cell.explore(
    column=metric, cmap="turbo", vmin=vmin, vmax=vmax, legend=True
).save("doy_max.html")

In [1332]:
# Aggregate to WOY

In [1333]:
woy_sightings_data_pod_ratios = sightings_data_pod_ratios.copy()
woy_sightings_data_pod_ratios["WOY"] = (woy_sightings_data_pod_ratios["DOY"] // 7) + 1
woy_sightings_data_pod_ratios["WOY"] = np.where(
    woy_sightings_data_pod_ratios["WOY"] > 52, 1, woy_sightings_data_pod_ratios["WOY"]
)
woy_sightings_data_pod_ratios = woy_sightings_data_pod_ratios.groupby(
    ["H3_CELL", "WOY"], as_index=False
)["CELL_RATIO_smooth_Scaled"].mean()

woy_sightings_data_pod_ratiosmnmx = woy_sightings_data_pod_ratios.groupby(
    ["WOY"], as_index=False
).agg(
    MX_VAL=("CELL_RATIO_smooth_Scaled", "max"),
    MN_VAL=("CELL_RATIO_smooth_Scaled", "min"),
)
woy_sightings_data_pod_ratios = pd.merge(
    woy_sightings_data_pod_ratios, woy_sightings_data_pod_ratiosmnmx
)
woy_sightings_data_pod_ratios["CELL_RATIO_smooth_Scaled"] = (
    woy_sightings_data_pod_ratios["CELL_RATIO_smooth_Scaled"]
    - woy_sightings_data_pod_ratios["MN_VAL"]
) / (woy_sightings_data_pod_ratios["MX_VAL"] - woy_sightings_data_pod_ratios["MN_VAL"])

In [1334]:
woy_sightings_data_pod_ratios = pd.merge(
    woy_sightings_data_pod_ratios, water_polygons[["H3_CELL", "geometry"]], how="left"
)
woy_sightings_data_pod_ratios = gpd.GeoDataFrame(
    woy_sightings_data_pod_ratios, geometry="geometry", crs="EPSG:4326"
)

***

### Validation

In [1335]:
eval_metric = "WOY"

In [1336]:
max_date = sightings_data[(sightings_data.POD_TAG.isin(pod_tag))].DATE.max()
max_ = (
    sightings_data[
        (sightings_data.POD_TAG.isin(pod_tag)) & (sightings_data.DATE == max_date)
    ][["YEAR", eval_metric]]
    .drop_duplicates()
    .iloc[0]
)
max_year = max_["YEAR"]
max_metric = max_[eval_metric]

sightings_data.DATE = pd.to_datetime(sightings_data.DATE)

sightings_data_pod_test = sightings_data[
    (sightings_data.POD_TAG.isin(pod_tag))
    & ((sightings_data.DATE > split_dates[0]) & (sightings_data.DATE < split_dates[1]))
]

sightings_data_pod_test = sightings_data_pod_test.groupby(
    ["H3_CELL", eval_metric, "YEAR"], as_index=False
).agg(SIGHTING=("COUNT", "count"))
sightings_data_pod_test["SIGHTING"] = 1

In [1337]:
sightings_data_pod_ratios_lookup = woy_sightings_data_pod_ratios[
    ["H3_CELL", eval_metric, "CELL_RATIO_smooth_Scaled"]
]

sightings_data_pod_test_fin = []
for yr in sightings_data_pod_test.YEAR.unique():
    # max_year, max_metric
    sightings_data_pod_test_yr = pd.merge(
        sightings_data_pod_test[sightings_data_pod_test.YEAR == yr],
        sightings_data_pod_ratios_lookup,
        how="outer",
    )

    if yr == max_year:
        sightings_data_pod_test_yr = sightings_data_pod_test_yr[
            sightings_data_pod_test_yr[eval_metric] <= max_metric
        ]
    sightings_data_pod_test_yr["YEAR"] = yr
    sightings_data_pod_test_fin.append(sightings_data_pod_test_yr)

sightings_data_pod_test_fin = pd.concat(sightings_data_pod_test_fin)
sightings_data_pod_test_fin["SIGHTING"] = sightings_data_pod_test_fin[
    "SIGHTING"
].fillna(0)

In [1338]:
# Get Metrics DF
metrics_df = threshold_metrics_with_cm(
    sightings_data_pod_test_fin,
    prob_col="CELL_RATIO_smooth_Scaled",
    target_col="SIGHTING",
)

In [1339]:
fig = px.line()
fig.add_scatter(x=metrics_df["threshold"], y=metrics_df["accuracy"], name="accuracy")
fig.add_scatter(x=metrics_df["threshold"], y=metrics_df["precision"], name="precision")
fig.add_scatter(x=metrics_df["threshold"], y=metrics_df["recall"], name="recall")
fig.add_scatter(x=metrics_df["threshold"], y=metrics_df["f1"], name="f1")
fig.show()

In [1340]:
# Example threshold
threshold = 0.05

# Make predictions based on threshold
sightings_data_pod_test_fin["PREDICTION"] = (
    sightings_data_pod_test_fin["CELL_RATIO_smooth_Scaled"] >= threshold
)

# Compute confusion matrix
cm = confusion_matrix(
    sightings_data_pod_test_fin["SIGHTING"], sightings_data_pod_test_fin["PREDICTION"]
)

# Optional: make it readable
cm_df = pd.DataFrame(
    cm,
    index=["Actual False", "Actual True"],
    columns=["Predicted False", "Predicted True"],
)

display(cm_df)

print()
display(metrics_df[metrics_df.threshold == threshold])

Unnamed: 0,Predicted False,Predicted True
Actual False,297759,16161
Actual True,98,459





Unnamed: 0,threshold,accuracy,precision,recall,f1,TN_pct,FP_pct,FN_pct,TP_pct
1,0.05,0.948298,0.027617,0.824057,0.053444,0.946839,0.05139,0.000312,0.00146


In [1341]:
tt = sightings_data_pod_test_fin[
    (sightings_data_pod_test_fin.SIGHTING == 1)
    & (sightings_data_pod_test_fin.PREDICTION == False)
]
tt_cells = tt.H3_CELL.tolist()

In [1342]:
tt

Unnamed: 0,H3_CELL,WOY,YEAR,SIGHTING,CELL_RATIO_smooth_Scaled,PREDICTION
112079,8628c249fffffff,20,2024,1.0,4.957097e-04,False
134605,8628d0a67ffffff,30,2024,1.0,2.951750e-02,False
134662,8628d0a6fffffff,35,2024,1.0,1.282369e-05,False
134663,8628d0a6fffffff,36,2024,1.0,1.188584e-03,False
134671,8628d0a6fffffff,44,2024,1.0,2.993289e-02,False
...,...,...,...,...,...,...
150455,8628d5c2fffffff,17,2025,1.0,7.980097e-08,False
150663,8628d5c77ffffff,17,2025,1.0,5.240475e-06,False
151175,8628d5d4fffffff,9,2025,1.0,2.038533e-02,False
176237,8628deb6fffffff,7,2025,1.0,5.635701e-11,False


In [1343]:
tmp = tt[eval_metric].value_counts().reset_index()
tmp2 = pd.DataFrame({eval_metric: sightings_data[eval_metric].unique()})

tmp = pd.merge(tmp, tmp2, how="outer")
tmp["count"] = tmp["count"].fillna(0)

tmp = tmp.sort_values(eval_metric).reset_index(drop=True)

In [1344]:
fig = px.bar(tmp, x=eval_metric, y="count")
fig.show()

In [1345]:
sightings_checks = sightings_data[
    (sightings_data.POD_TAG.isin(pod_tag))
    & (sightings_data.H3_CELL.isin(tt_cells))
    & ((sightings_data.DATE > split_dates[0]) & (sightings_data.DATE < split_dates[1]))
]

sightings_checks = pd.merge(sightings_checks, tt)

In [1346]:
import geopandas as gpd
from shapely.geometry import Point

# Suppose your DataFrame is called df and has LATITUDE, LONGITUDE
sightings_checks["geometry"] = sightings_checks.apply(
    lambda row: Point(row["LONGITUDE"], row["LATITUDE"]), axis=1
)

# Convert to GeoDataFrame
sightings_checks = gpd.GeoDataFrame(
    sightings_checks, geometry="geometry", crs="EPSG:4326"
)

In [1347]:
metric_filter = 18

In [1348]:
m = woy_sightings_data_pod_ratios[
    woy_sightings_data_pod_ratios[eval_metric] == metric_filter
].explore("CELL_RATIO_smooth_Scaled", cmap="turbo")

sightings_checks[sightings_checks[eval_metric] == metric_filter].explore(
    m=m, color="#FCFF4B"
).save("points.html")

In [1349]:
temporal_plot = woy_sightings_data_pod_ratios[
    ["WOY", "CELL_RATIO_smooth_Scaled", "geometry"]
].copy()
temporal_plot.columns = ["WOY", "PROBS", "geometry"]
temporal_plot = gpd.GeoDataFrame(temporal_plot, geometry="geometry", crs="EPSG:4326")
temporal_plot["Latitude"] = temporal_plot.geometry.centroid.y
temporal_plot["Longitude"] = temporal_plot.geometry.centroid.x


Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.



Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.




In [1350]:
# import plotly.express as px

# # Get fixed scale bounds
# zmin = temporal_plot["PROBS"].min()
# zmax = temporal_plot["PROBS"].max()

# fig = px.density_mapbox(
#     temporal_plot,
#     lat="Latitude",
#     lon="Longitude",
#     z="PROBS",
#     radius=50,
#     center=dict(lat=48, lon=-122),
#     zoom=5,
#     mapbox_style="carto-positron",
#     color_continuous_scale="Reds",
#     range_color=(zmin, zmax),  # <-- fixed color range
#     height=800,
#     animation_frame="WOY"
# )

# # Optional: keep colorbar in the same position
# fig.update_layout(
#     coloraxis_colorbar=dict(
#         x=0.92,  # position to the right
#         y=0.5,   # center vertically
#         title="Probability"
#     )
# )

# fig.show()

In [1351]:
import folium
from folium.plugins import HeatMapWithTime
import pandas as pd
import numpy as np

# Ensure numeric columns
df = temporal_plot.copy()
df["lat"] = df.geometry.centroid.y.astype(float)
df["lon"] = df.geometry.centroid.x.astype(float)
df["PROBS"] = df["PROBS"].astype(float)

df["PROBS"] = np.where(df["PROBS"] < 0.01, 0.01, df["PROBS"])


weeks = sorted(df["WOY"].unique())
heat_data = []

for w in weeks:
    week_df = df[df["WOY"] == w]
    # Drop any rows with missing coords or PROBS
    week_df = week_df.dropna(subset=["lat", "lon", "PROBS"])
    # Convert to list of [lat, lon, weight]
    points = week_df[["lat", "lon", "PROBS"]].values.tolist()
    heat_data.append(points)

# Map
m = folium.Map(location=[48, -122], zoom_start=6, tiles="CartoDB positron")

HeatMapWithTime(
    heat_data,
    index=weeks,
    auto_play=False,
    max_opacity=0.8,
    radius=15,
    gradient={
        0.1: "#001219",
        0.2: "#005f73",
        0.3: "#0a9396",
        0.4: "#94d2bd",
        0.5: "#e9d8a6",
        0.6: "#ee9b00",
        0.7: "#ca6702",
        0.8: "#bb3e03",
        0.9: "#ae2012",
        1.0: "#9b2226",
    },
).add_to(m)

m.save("sightings_heatmap.html")


Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.



Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.


