## import libraries

In [1]:
import pandas as pd
import geopandas as gpd
import folium

from shapely.geometry import Polygon, Point
import numpy as np
import random

import rasterio
from rasterio.transform import from_origin

In [2]:
# Set GeoPandas to use pyogrio
gpd.options.io_engine = "pyogrio"

## folium map generator function

In [2]:
# List of available tiles with their attributions and layer names (including USGS maps)
tiles_list = [
    # {"tiles": "OpenStreetMap", "attr": "© OpenStreetMap contributors", "name": "OpenStreetMap"},
    # {"tiles": "https://tiles.stadiamaps.com/tiles/stamen_toner/{z}/{x}/{y}{r}.png", "attr": '&copy; <a href="https://stadiamaps.com/" target="_blank">Stadia Maps</a> <a href="https://stamen.com/" target="_blank">&copy; Stamen Design</a> &copy; <a href="https://openmaptiles.org/" target="_blank">OpenMapTiles</a> &copy; <a href="https://www.openstreetmap.org/copyright" target="_blank">OpenStreetMap</a>', "name": "Stamen Toner"},
    # {"tiles": "https://tiles.stadiamaps.com/tiles/stamen_toner_lite/{z}/{x}/{y}{r}.png", "attr": '&copy; <a href="https://stadiamaps.com/" target="_blank">Stadia Maps</a> <a href="https://stamen.com/" target="_blank">&copy; Stamen Design</a> &copy; <a href="https://openmaptiles.org/" target="_blank">OpenMapTiles</a> &copy; <a href="https://www.openstreetmap.org/copyright" target="_blank">OpenStreetMap</a>', "name": "Stamen Toner Lite"},
    # {"tiles": "https://tiles.stadiamaps.com/tiles/stamen_terrain/{z}/{x}/{y}{r}.png", "attr": '&copy; <a href="https://stadiamaps.com/" target="_blank">Stadia Maps</a> <a href="https://stamen.com/" target="_blank">&copy; Stamen Design</a> &copy; <a href="https://openmaptiles.org/" target="_blank">OpenMapTiles</a> &copy; <a href="https://www.openstreetmap.org/copyright" target="_blank">OpenStreetMap</a>', "name": "Stamen Terrain"},
    # {"tiles": "https://tiles.stadiamaps.com/tiles/stamen_watercolor/{z}/{x}/{y}.jpg", "attr": '&copy; <a href="https://stadiamaps.com/" target="_blank">Stadia Maps</a> <a href="https://stamen.com/" target="_blank">&copy; Stamen Design</a> &copy; <a href="https://www.openstreetmap.org/copyright" target="_blank">OpenStreetMap</a>', "name": "Stamen Watercolor"},
    # {"tiles": "https://tiles.stadiamaps.com/tiles/cartodb_positron/{z}/{x}/{y}{r}.png", "attr": 'Map tiles by CartoDB, under CC BY 3.0. Data by &copy; <a href="https://www.openstreetmap.org/copyright">OpenStreetMap</a> under ODbL.', "name": "CartoDB Positron"},
    # {"tiles": "https://tiles.stadiamaps.com/tiles/cartodb_dark_matter/{z}/{x}/{y}{r}.png", "attr": 'Map tiles by CartoDB, under CC BY 3.0. Data by &copy; <a href="https://www.openstreetmap.org/copyright">OpenStreetMap</a> under ODbL.', "name": "CartoDB Dark Matter"},
    {"tiles": "https://basemap.nationalmap.gov/arcgis/rest/services/USGSTopo/MapServer/tile/{z}/{y}/{x}", "attr": 'U.S. Department of the Interior | U.S. Geological Survey', "name": "USGS Topo"},
    {"tiles": "https://basemap.nationalmap.gov/arcgis/rest/services/USGSImageryOnly/MapServer/tile/{z}/{y}/{x}", "attr": 'U.S. Department of the Interior | U.S. Geological Survey', "name": "USGS Imagery"},
    {"tiles": "https://basemap.nationalmap.gov/arcgis/rest/services/USGSImageryTopo/MapServer/tile/{z}/{y}/{x}", "attr": 'U.S. Department of the Interior | U.S. Geological Survey', "name": "USGS Imagery Topo"},
    {"tiles": "OpenStreetMap", "attr": "© OpenStreetMap contributors", "name": "OpenStreetMap"},
    {"tiles": "https://tiles.stadiamaps.com/tiles/stamen_toner_lite/{z}/{x}/{y}{r}.png", "attr": '&copy; <a href="https://stadiamaps.com/" target="_blank">Stadia Maps</a> <a href="https://stamen.com/" target="_blank">&copy; Stamen Design</a> &copy; <a href="https://openmaptiles.org/" target="_blank">OpenMapTiles</a> &copy; <a href="https://www.openstreetmap.org/copyright" target="_blank">OpenStreetMap</a>', "name": "Stamen Toner Lite"},
]

def generate_folium_map_with_csb_centroids_and_colors(gdf, zoom=6, decimal_places=3):
    # Check CRS and convert to EPSG:4326 if needed
    if gdf.crs != "EPSG:4326":
        gdf = gdf.to_crs(epsg=4326)
    
    # Convert to a projected CRS for accurate centroid calculation
    gdf_projected = gdf.to_crs('EPSG:2163')
    gdf_projected['centroid'] = gdf_projected['geometry'].centroid
    
    # Convert centroids back to EPSG:4326 for latitude and longitude extraction
    gdf_projected['centroid'] = gdf_projected['centroid'].to_crs('EPSG:4326')
    gdf['Longitude'] = gdf_projected['centroid'].x
    gdf['Latitude'] = gdf_projected['centroid'].y
    
    # Format latitude and longitude to the desired number of decimal places
    gdf['Longitude'] = gdf['Longitude'].map(lambda x: f'{x:.{decimal_places}f}')
    gdf['Latitude'] = gdf['Latitude'].map(lambda x: f'{x:.{decimal_places}f}')
    
    # Ensure necessary columns are included in the properties
    gdf = gdf[['geometry', 'CDL2023', 'Longitude', 'Latitude', 'color', 'Crop', 'Elevation']]
    
    def style_function(feature):
        return {
            'fillColor': feature['properties']['color'],
            'color': feature['properties']['color'],
            'weight': 1,
            'fillOpacity': 0.6
        }

    # Convert geometries to GeoJSON
    geojson_data = gdf.to_json()
    
    # Calculate map center
    minx, miny, maxx, maxy = gdf.total_bounds
    center_longitude = (minx + maxx) / 2
    center_latitude = (miny + maxy) / 2
    
    # Initialize map
    m = folium.Map(location=[center_latitude, center_longitude], zoom_start=zoom, tiles=None)
    
    # Add each tile layer to the map
    for tile_info in tiles_list:
        tiles = tile_info["tiles"]
        attr = tile_info["attr"]
        layer_name = tile_info["name"]
        folium.TileLayer(tiles=tiles, attr=attr, name=layer_name).add_to(m)
    
    # Add GeoJSON layer with custom popups and styles
    folium.GeoJson(
        geojson_data,
        name='Crop Sequence Boundaries',
        style_function=style_function,
        popup=folium.GeoJsonPopup(fields=['CDL2023', 'Crop', 'Latitude', 'Longitude','Elevation'], aliases=['Crop ID:', 'Crop Name:', 'Latitude:', 'Longitude:', 'Elevation:'])
    ).add_to(m)
    
    # Create a feature group for the centroid markers
    centroid_layer = folium.FeatureGroup(name='Centroid Markers')
    
    # Add centroid markers to the feature group
    for _, row in gdf.iterrows():
        popup_html = f'<b>Crop ID:</b> {row["CDL2023"]}<br><b>Crop Name:</b> {row["Crop"]}<br><b>Latitude:</b> {row["Latitude"]}<br><b>Longitude:</b> {row["Longitude"]}<br><b>Elevation:</b> {row["Elevation"]}'
        folium.CircleMarker(
            location=[float(row['Latitude']), float(row['Longitude'])],
            popup=folium.Popup(popup_html, max_width=250),
            radius=4,
            color='grey',
            fill=True,
            fill_color=row['color'],
            fill_opacity=1,
            weight=0.2,
        ).add_to(centroid_layer)
    
    # Add the centroid layer to the map
    centroid_layer.add_to(m)
    
    # Add layer control to the map
    folium.LayerControl().add_to(m)
    
    return m


## load and sample data

In [3]:
csb_combined = gpd.read_parquet('../data/agricultural/CSB/siads696/20240715_165400_gdf_four_corners_combined.parquet')

In [4]:
# drop the vestigal 'index' column
# csb_combined.columns
csb_combined.drop(columns=['index'], inplace=True)

## include elevations

In [5]:
digital_elevation_file = '../data/elevation/PRISM_us_dem_4km_bil/PRISM_us_dem_4km_bil.bil'

In [6]:
# Extract the latitude and longitude values
latitudes = csb_combined['Latitude'].values
longitudes = csb_combined['Longitude'].values

# Convert to GeoDataFrame
gdf = gpd.GeoDataFrame(
    csb_combined,
    geometry=gpd.points_from_xy(csb_combined.Longitude, csb_combined.Latitude),
    crs='EPSG:4326'  # Assuming the coordinates are in WGS84
)

In [7]:
with rasterio.open(digital_elevation_file) as dem:
    # Check the CRS of the DEM
    dem_crs = dem.crs
    print(f"DEM CRS: {dem_crs}")

    # Transform the GeoDataFrame to match the DEM CRS
    gdf = gdf.to_crs(dem_crs)

    # Extract transformed coordinates
    coords = [(x, y) for x, y in zip(gdf.geometry.x, gdf.geometry.y)]

    # Sample the DEM at these coordinates
    elevations = [val for val in dem.sample(coords)]
    
    # Flatten the list of elevations
    elevations = [e[0] for e in elevations]
    
    
# Add the elevation values to the DataFrame
csb_combined['Elevation'] = elevations

DEM CRS: OGC:CRS83


In [12]:
csb_combined['Elevation'].min(), csb_combined['Elevation'].max()

(26, 3665)

In [17]:
# list(csb_combined.columns)
reorder_columns = [
    'CSBID',
    'CSBYEARS',
    'CSBACRES',
    'CDL2016',
    'CDL2017',
    'CDL2018',
    'CDL2019',
    'CDL2020',
    'CDL2021',
    'CDL2022',
    'CDL2023',
    'STATEFIPS',
    'STATEASD',
    'ASD',
    'CNTY',
    'CNTYFIPS',
    'INSIDE_X',
    'INSIDE_Y',
    'Shp_Len',
    'Shp_Area',
    'geometry',
    'Longitude',
    'Latitude',
    'Elevation',
    'color',
    'Crop',
]

In [18]:
csb_combined = csb_combined[reorder_columns]

In [20]:
# list(csb_combined.columns)

In [10]:
csb_combined.to_parquet('../data/agricultural/CSB/siads696/csb_combined_with_elevation.parquet')

In [21]:
sample_size = 1200
csb_samples = csb_combined.sample(n=sample_size, random_state=42)
csb_samples

Unnamed: 0,CSBID,CSBYEARS,CSBACRES,CDL2016,CDL2017,CDL2018,CDL2019,CDL2020,CDL2021,CDL2022,...,INSIDE_X,INSIDE_Y,Shp_Len,Shp_Area,geometry,Longitude,Latitude,Elevation,color,Crop
194047,081623003680567,1623,24.595302,61,24,61,24,42,24,42,...,-1.120167e+06,1.711077e+06,1778.756494,99534.055948,"MULTIPOLYGON (((-1120006.725 1711204.851, -111...",-108.874516,37.752494,2114,#a87000,Winter Wheat
180917,081623001172341,1623,5.813687,1,24,61,1,36,36,1,...,-6.973936e+05,1.704912e+06,1018.698379,23527.251774,"MULTIPOLYGON (((-697143.136 1704945.584, -6971...",-104.039539,38.112161,1328,#bfbf7a,Fallow/Idle Cropland
392157,081623009660832,1623,68.025049,36,36,36,36,36,36,36,...,-1.017694e+06,1.772620e+06,3075.720099,275288.707532,"MULTIPOLYGON (((-1017862.847 1772957.076, -101...",-107.796641,38.415305,1912,#ffa8e3,Alfalfa
120215,351623002569319,1623,6.808717,36,190,36,152,36,36,37,...,-9.885239e+05,1.308874e+06,830.708511,27554.010266,"MULTIPOLYGON (((-988371.522 1308907.044, -9883...",-106.850683,34.334479,1465,#e9ffbe,Grass/Pasture
411765,081623010117052,1623,3.723023,176,24,4,61,24,61,24,...,-6.430940e+05,1.835682e+06,550.657171,15066.598593,"MULTIPOLYGON (((-643066.725 1835749.68, -64303...",-103.535552,39.314620,1699,#bfbf7a,Fallow/Idle Cropland
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
110152,351623001801779,1623,5.777508,1,2,61,205,1,205,1,...,-6.489027e+05,1.298843e+06,1085.739826,23380.840552,"MULTIPOLYGON (((-648702.809 1298873.001, -6487...",-103.132301,34.532521,1317,#a87000,Winter Wheat
101320,351623000927136,1623,2.950810,4,4,61,24,152,4,4,...,-7.031136e+05,1.215967e+06,622.167642,11941.553840,"MULTIPOLYGON (((-703135.074 1216033.76, -70308...",-103.652638,33.755502,1358,#ff9e0f,Sorghum
247352,081623006700815,1623,7.830029,36,36,176,36,36,195,36,...,-7.206152e+05,1.849777e+06,897.404967,31687.130011,"MULTIPOLYGON (((-720613.545 1849880.018, -7206...",-104.454050,39.381817,1947,#ffa8e3,Alfalfa
450533,081623011958926,1623,2.755083,1,1,1,176,61,176,61,...,-7.229287e+05,1.934624e+06,474.070288,11149.471426,"MULTIPOLYGON (((-722869.696 1934689.09, -72287...",-104.570576,40.134105,1503,#a87000,Winter Wheat


In [16]:
sample_map = generate_folium_map_with_csb_centroids_and_colors(csb_samples, zoom=6, decimal_places=3)

In [18]:
sample_map

In [29]:
def generate_grid(gdf, cell_size):
    # Get the bounds of the GeoDataFrame
    minx, miny, maxx, maxy = gdf.total_bounds
    
    # Generate grid cells
    x_coords = np.arange(minx, maxx, cell_size)
    y_coords = np.arange(miny, maxy, cell_size)
    
    grid_cells = []
    for x in x_coords:
        for y in y_coords:
            grid_cells.append(Polygon([(x, y), (x + cell_size, y), (x + cell_size, y + cell_size), (x, y + cell_size)]))
    
    return gpd.GeoDataFrame(grid_cells, columns=['geometry'])

def spatially_balanced_sample(gdf, num_samples, cell_size):
    grid = generate_grid(gdf, cell_size)
    sampled_points = []
    
    for cell in grid.geometry:
        points_within_cell = gdf[gdf.geometry.within(cell)]
        if not points_within_cell.empty:
            sampled_points.append(points_within_cell.sample(n=1, random_state=42))
    
    # If the number of samples is more than required, randomly reduce
    if len(sampled_points) > num_samples:
        sampled_points = random.sample(sampled_points, num_samples)
    
    # Combine sampled points into a single GeoDataFrame
    sampled_gdf = gpd.GeoDataFrame(pd.concat(sampled_points, ignore_index=False))
    
    return sampled_gdf

In [30]:
csb_combined.crs

<Projected CRS: {"$schema": "https://proj.org/schemas/v0.7/projjso ...>
Name: unknown
Axis Info [cartesian]:
- E[east]: Easting (metre)
- N[north]: Northing (metre)
Area of Use:
- undefined
Coordinate Operation:
- name: unknown
- method: Albers Equal Area
Datum: North American Datum 1983
- Ellipsoid: GRS 1980
- Prime Meridian: Greenwich

In [31]:
# Set cell size (adjust as needed)
cell_size = 10000  # in the units of your CRS, e.g., meters

# Specify the number of samples
num_samples = 1200

# Get the spatially balanced sample
geo_balanced_samples = spatially_balanced_sample(csb_combined, num_samples, cell_size)

In [32]:
m_geo_balanced = generate_folium_map_with_csb_centroids_and_colors(geo_balanced_samples, zoom=6, decimal_places=3)

In [33]:
m_geo_balanced

In [34]:
geo_balanced_samples.to_parquet('../data/agricultural/CSB/siads696/geo_balanced_sample.parquet')

In [35]:
geo_balanced_samples

Unnamed: 0,CSBID,CSBYEARS,CSBACRES,CDL2016,CDL2017,CDL2018,CDL2019,CDL2020,CDL2021,CDL2022,...,INSIDE_X,INSIDE_Y,Shp_Len,Shp_Area,geometry,Longitude,Latitude,Elevation,color,Crop
478801,081623012787392,1623,3.791540,4,24,61,61,24,24,61,...,-6.035779e+05,1.704774e+06,532.211689,15343.877410,"MULTIPOLYGON (((-603525.799 1704829.066, -6035...",-102.962080,38.176922,1231,#bfbf7a,Fallow/Idle Cropland
107446,351623001627247,1623,4.325446,225,1,225,152,152,152,36,...,-7.967410e+05,1.149389e+06,723.964572,17504.529797,"MULTIPOLYGON (((-796679.747 1149498.895, -7966...",-104.600667,33.089835,1138,#ffa8e3,Alfalfa
121162,351623002777545,1623,3.222010,61,176,1,176,36,36,1,...,-1.086137e+06,1.590736e+06,549.623304,13039.063523,"MULTIPOLYGON (((-1086091.187 1590803.212, -108...",-108.309751,36.728108,1676,#ffd400,Corn
88813,351623000046581,1623,10.204036,36,36,36,36,152,37,37,...,-9.788489e+05,1.595846e+06,1274.741723,41294.433943,"MULTIPOLYGON (((-978851.432 1595726.11, -97884...",-107.112180,36.891083,2252,#a5f58d,Other Hay/Non Alfalfa
516266,081623013377366,1623,2.918481,24,29,29,24,1,61,61,...,-6.203751e+05,1.992249e+06,556.566951,11810.721459,"MULTIPOLYGON (((-620252.778 1992330.141, -6202...",-103.412893,40.723858,1330,#bfbf7a,Fallow/Idle Cropland
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
119071,351623002568175,1623,2.957066,176,176,176,61,24,24,24,...,-9.138551e+05,1.348095e+06,599.379620,11966.871324,"MULTIPOLYGON (((-913829.788 1348147.68, -91380...",-106.082615,34.756107,1877,#e9ffbe,Grass/Pasture
540278,081623014401456,1623,9.374885,61,24,61,1,1,1,1,...,-6.892023e+05,1.934718e+06,988.353218,37938.965442,"MULTIPOLYGON (((-689103.22 1934811.717, -68911...",-104.172849,40.161427,1417,#a5f58d,Other Hay/Non Alfalfa
591949,041623013556134,1623,3.770585,72,72,72,190,190,190,190,...,-1.707418e+06,1.273220e+06,523.387689,15259.076277,"MULTIPOLYGON (((-1707373.732 1273277.961, -170...",-114.508535,33.028054,101,#80b3b3,Woody Wetlands
217842,081623005639708,1623,4.307316,61,24,1,61,24,29,61,...,-6.004245e+05,1.898598e+06,634.862780,17431.160158,"MULTIPOLYGON (((-600373.153 1898692.428, -6003...",-103.092389,39.903658,1396,#a87000,Winter Wheat
