In [1]:
import pandas as pd
import h3
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

# Import our new modules
from texas_mushrooms.pipeline import spatial, weather
from texas_mushrooms.modeling.bayesian import BayesianMushroomModel

# Set plot style
sns.set_theme(style="whitegrid")
%matplotlib inline

# Create outputs directory
OUTPUT_DIR = Path("../data/outputs")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

## 1. Load Data and Add H3 Indices

We'll load the processed geospatial data and add H3 indices at Resolution 7 (~1.2km edge length).

In [2]:
# Load data (assuming processed data exists, otherwise fallback to raw photos)
data_path = Path("../data/processed/photo_geospatial.csv")
if not data_path.exists():
    print("Processed data not found. Loading raw photos...")
    data_path = Path("../data/raw/photos.csv")

df = pd.read_csv(data_path)

# Ensure we have lat/lon
if "latitude" not in df.columns:
    # If using raw photos, we might need to merge with days or handle missing coords
    print("Warning: Latitude/Longitude columns missing. Please run the processing pipeline first.")
else:
    # Add H3 Indices
    df_h3 = spatial.add_h3_indices(df, resolution=7)
    print(f"Added H3 indices. Unique cells: {df_h3['h3_index'].nunique()}")
    display(df_h3.head())

Added H3 indices. Unique cells: 87


Unnamed: 0,date,page_url,photo_url,latitude,longitude,label_species,h3_index
0,2018-01-06,https://www.texasmushrooms.org/date-en/2018-01...,https://www.texasmushrooms.org/asergeev/pictur...,30.626354,-96.318291,Unidentified,87446d3adffffff
1,2018-01-06,https://www.texasmushrooms.org/date-en/2018-01...,https://www.texasmushrooms.org/asergeev/pictur...,30.626368,-96.31827,Cortinarius xanthodryophilus,87446d3adffffff
2,2018-01-06,https://www.texasmushrooms.org/date-en/2018-01...,https://www.texasmushrooms.org/asergeev/pictur...,30.626376,-96.318319,Unidentified,87446d3adffffff
3,2018-01-07,https://www.texasmushrooms.org/date-en/2018-01...,https://www.texasmushrooms.org/asergeev/pictur...,30.601548,-96.30666,Fomitiporia punctata,87446d312ffffff
4,2018-01-07,https://www.texasmushrooms.org/date-en/2018-01...,https://www.texasmushrooms.org/asergeev/pictur...,30.601548,-96.30666,Fomitiporia punctata,87446d312ffffff


## 2. Enrich with Elevation

We will fetch elevation data for each unique H3 cell centroid using the Open-Meteo API.

In [3]:
# Get unique H3 indices
unique_h3 = df_h3["h3_index"].dropna().unique()

# Get centroids
centroids = [spatial.get_h3_centroid(h) for h in unique_h3]
lats = [c[0] for c in centroids]
lons = [c[1] for c in centroids]

# Fetch elevation (batch)
print(f"Fetching elevation for {len(unique_h3)} unique cells...")
elevations = weather.fetch_elevation_batch(lats, lons)

# Create a mapping
h3_elevation = dict(zip(unique_h3, elevations))

# Map back to dataframe
df_h3["elevation"] = df_h3["h3_index"].map(h3_elevation)

# Save H3 enriched data
df_h3.to_csv(OUTPUT_DIR / "h3_enriched_data.csv", index=False)
print(f"H3 enriched data saved to {OUTPUT_DIR / 'h3_enriched_data.csv'}")

display(df_h3[["h3_index", "latitude", "longitude", "elevation"]].head())

Fetching elevation for 87 unique cells...
H3 enriched data saved to ../data/outputs/h3_enriched_data.csv


Unnamed: 0,h3_index,latitude,longitude,elevation
0,87446d3adffffff,30.626354,-96.318291,98.0
1,87446d3adffffff,30.626368,-96.31827,98.0
2,87446d3adffffff,30.626376,-96.318319,98.0
3,87446d312ffffff,30.601548,-96.30666,80.0
4,87446d312ffffff,30.601548,-96.30666,80.0


In [8]:
# ...existing code...
# Assumes df_h3 has columns: h3_index, elevation

import h3
import folium

# Aggregate: one record per H3 cell with mean elevation
h3_stats = (
    df_h3.dropna(subset=["h3_index", "elevation"])
         .groupby("h3_index", as_index=False)["elevation"]
         .mean()
)

# Build GeoJSON FeatureCollection directly from H3 boundaries
features = []
for _, row in h3_stats.iterrows():
    h = row["h3_index"]
    elev = float(row["elevation"])
    # boundary as [(lat, lon), ...]; convert to [lon, lat] and close polygon
    try:
        boundary = h3.cell_to_boundary(h)
    except AttributeError:
        boundary = h3.h3_to_geo_boundary(h, geo_json=True)
    coords = [[lon, lat] for (lat, lon) in boundary]
    if coords[0] != coords[-1]:
        coords.append(coords[0])
    features.append({
        "type": "Feature",
        "properties": {"h3_index": h, "elevation": elev},
        "geometry": {"type": "Polygon", "coordinates": [coords]},
    })

geojson = {"type": "FeatureCollection", "features": features}

# Compute center from polygons (simple average of vertices)
if features:
    all_pts = [pt for f in features for pt in f["geometry"]["coordinates"][0]]
    avg_lon = sum(p[0] for p in all_pts) / len(all_pts)
    avg_lat = sum(p[1] for p in all_pts) / len(all_pts)
    center = [avg_lat, avg_lon]
else:
    center = [31.9686, -99.9018]  # Texas fallback

m = folium.Map(location=center, zoom_start=6, tiles="CartoDB positron")

# Use a valid ColorBrewer scheme (e.g., "YlGnBu")
folium.Choropleth(
    geo_data=geojson,
    data=h3_stats,
    columns=["h3_index", "elevation"],
    key_on="feature.properties.h3_index",
    fill_color="YlGnBu",   # ColorBrewer palette (valid)
    fill_opacity=0.75,
    line_opacity=0.25,
    legend_name="Elevation (m)",
).add_to(m)

# Tooltip layer for H3 and elevation
folium.GeoJson(
    geojson,
    name="H3 Elevation",
    tooltip=folium.features.GeoJsonTooltip(
        fields=["h3_index", "elevation"],
        aliases=["H3", "Elevation (m)"],
        localize=True,
    ),
    style_function=lambda _: {"weight": 0.25, "color": "#555"},
    highlight_function=lambda _: {"weight": 2, "color": "#000"},
).add_to(m)

folium.LayerControl().add_to(m)

from IPython.display import IFrame
map_path = OUTPUT_DIR / "h3_elevation_map.html"
m.save(str(map_path))
print(f"Map saved to {map_path}")

Map saved to ../data/outputs/h3_elevation_map.html


In [9]:
# Aggregate counts per H3 cell
h3_counts = df_h3.groupby("h3_index").size().reset_index(name="photo_count")

# Build GeoJSON FeatureCollection for counts
features_counts = []
for _, row in h3_counts.iterrows():
    h = row["h3_index"]
    count = int(row["photo_count"])
    
    # Get boundary geometry
    try:
        boundary = h3.cell_to_boundary(h)
    except AttributeError:
        boundary = h3.h3_to_geo_boundary(h, geo_json=True)
        
    # Convert to GeoJSON format [lon, lat]
    coords = [[lon, lat] for (lat, lon) in boundary]
    if coords[0] != coords[-1]:
        coords.append(coords[0])
        
    features_counts.append({
        "type": "Feature",
        "properties": {"h3_index": h, "photo_count": count},
        "geometry": {"type": "Polygon", "coordinates": [coords]},
    })

geojson_counts = {"type": "FeatureCollection", "features": features_counts}

# Initialize map (using same center as before if available, else Texas default)
if 'center' not in locals():
    center = [31.9686, -99.9018]

m_counts = folium.Map(location=center, zoom_start=6, tiles="CartoDB positron")

# Add Choropleth layer for counts
folium.Choropleth(
    geo_data=geojson_counts,
    data=h3_counts,
    columns=["h3_index", "photo_count"],
    key_on="feature.properties.h3_index",
    fill_color="YlOrRd",  # Yellow-Orange-Red is good for counts/density
    fill_opacity=0.75,
    line_opacity=0.25,
    legend_name="Photo Count",
).add_to(m_counts)

# Add Tooltip layer
folium.GeoJson(
    geojson_counts,
    name="H3 Photo Counts",
    tooltip=folium.features.GeoJsonTooltip(
        fields=["h3_index", "photo_count"],
        aliases=["H3 Index", "Total Photos"],
        localize=True,
    ),
    style_function=lambda _: {"weight": 0.25, "color": "#555", "fillOpacity": 0}, # Transparent fill to see Choropleth
    highlight_function=lambda _: {"weight": 2, "color": "#000"},
).add_to(m_counts)

folium.LayerControl().add_to(m_counts)

# Save and display
map_path_counts = OUTPUT_DIR / "h3_counts_map.html"
m_counts.save(str(map_path_counts))
print(f"Count map saved to {map_path_counts}")

Count map saved to ../data/outputs/h3_counts_map.html


## 3. Prepare Data for Modeling

We need to aggregate counts by H3 cell and Date to create a target variable for our Poisson model.

In [6]:
# Aggregate counts
# We need a date column. If using raw photos, we might need to extract it or merge.
# Assuming 'date' exists or can be derived.
if "date" in df_h3.columns:
    daily_counts = df_h3.groupby(["h3_index", "date"]).size().reset_index(name="count")
    
    # Merge with elevation (static per cell)
    daily_counts["elevation"] = daily_counts["h3_index"].map(h3_elevation)
    
    # Fill missing elevations if any
    daily_counts = daily_counts.dropna(subset=["elevation"])
    
    # Save aggregated data
    daily_counts.to_csv(OUTPUT_DIR / "daily_counts_by_h3.csv", index=False)
    print(f"Daily counts saved to {OUTPUT_DIR / 'daily_counts_by_h3.csv'}")
    
    print("Modeling Data Prepared:")
    display(daily_counts.head())
else:
    print("Date column missing. Cannot aggregate for temporal modeling.")

Daily counts saved to ../data/outputs/daily_counts_by_h3.csv
Modeling Data Prepared:


Unnamed: 0,h3_index,date,count,elevation
0,874468428ffffff,2022-11-02,1,112.0
1,874468510ffffff,2020-11-14,6,52.0
2,874468510ffffff,2021-11-27,33,52.0
3,874468514ffffff,2019-11-23,9,40.0
4,874468514ffffff,2020-10-17,6,40.0
