# Define Strata
Generate three equal-area elevation strata (Low, Medium, High) for each province.

In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np
import rasterio
from shapely.geometry import Point
from rasterio.session import AWSSession

In [11]:
PROVINCES_SHP = r"../../data/raw/boundaries/study_provinces.shp"
EQUAL_AREA_CRS = "EPSG:5070"
N_SAMPLE_POINTS_PER_PROVINCE = 10000
DEM_COG_URL = "https://elevation.s3.amazonaws.com/s3.xml" # Public COG VRT for the USGS 3DEP 10m DEM

In [12]:
def generate_random_points_in_polygon(polygon, n_points):
    """Generates N spatially random points within a Shapely polygon."""
    min_x, min_y, max_x, max_y = polygon.bounds
    points = []
    while len(points) < n_points:
        random_point = Point(np.random.uniform(min_x, max_x),
                             np.random.uniform(min_y, max_y))
        if polygon.contains(random_point):
            points.append(random_point)
    return points

def get_elevation_from_cog(points_gdf, dem_url):
    """
    Queries elevation for points from a cloud-hosted DEM.

    Args:
        points_gdf (GeoDataFrame): GeoDataFrame of points in the same CRS as the DEM.
        dem_url (str): URL to the DEM (VRT for a mosaic is ideal).

    Returns:
        list: A list of elevation values.
    """
    # Using AWS Public Dataset - no credentials needed for this dataset
    aws_session = AWSSession(aws_access_key_id=None, aws_secret_access_key=None, region_name='us-east-1')

    elevations = []
    with rasterio.Env(rasterio_session=aws_session):
        with rasterio.open(dem_url) as src:
            # Ensure points are in the DEM's CRS
            points_dem_crs = points_gdf.to_crs(src.crs)
            coords = [(p.x, p.y) for p in points_dem_crs.geometry]

            # Use src.sample to efficiently query point values
            print(f"Querying elevations for {len(coords)} points...")
            for val in src.sample(coords):
                # val is a numpy array (e.g., [123.45])
                elevations.append(val[0])

    return elevations

In [13]:
# 1. Load province polygons
provinces = gpd.read_file(PROVINCES_SHP)
assert not provinces.empty, "No provinces loaded."
if provinces.crs is None:
    raise ValueError("Provinces shapefile has no CRS. Assign it before proceeding.")
provinces_eq = provinces.to_crs(EQUAL_AREA_CRS)
print(f"Loaded {len(provinces_eq)} provinces.")

Loaded 6 provinces.


In [14]:
# 2. Iterate through each province to find its elevation strata
strata_definitions = {}
for index, province in provinces_eq.iterrows():
    province_name = province['PROVINCE']
    print(f"\n--- Processing: {province_name} ---")

    # Generate random points for sampling the elevation distribution
    sample_points = generate_random_points_in_polygon(province.geometry, N_SAMPLE_POINTS_PER_PROVINCE)
    points_gdf = gpd.GeoDataFrame(geometry=sample_points, crs=EQUAL_AREA_CRS)

    # Get elevation for each point from the cloud
    elevation_values = get_elevation_from_cog(points_gdf, DEM_COG_URL)

    # Remove any NoData values if they exist (e.g., points in water bodies)
    valid_elevations = [e for e in elevation_values if e > -9999] # Check DEM's NoData value

    if len(valid_elevations) < 100:
        print(f"Warning: Very few valid elevation points found for {province_name}.")
        continue

    # Calculate the 33.3rd and 66.7th percentiles for equal-area strata
    z33 = np.quantile(valid_elevations, 0.333)
    z66 = np.quantile(valid_elevations, 0.667)

    strata_definitions[province_name] = {
        'z33': z33,
        'z66': z66,
        'strata_low': f"elev < {z33:.2f}",
        'strata_medium': f"{z33:.2f} <= elev < {z66:.2f}",
        'strata_high': f"elev >= {z66:.2f}",
        'num_samples': len(valid_elevations)
    }


--- Processing: COASTAL PLAIN ---


AttributeError: 'NoneType' object has no attribute 'Session'

In [None]:
# 3. Display the final strata definitions
strata_df = pd.DataFrame.from_dict(strata_definitions, orient='index')
print("\n=== Final Equal-Area Strata Definitions ===")
print(strata_df)