**Buffer Water Areas**

In [None]:
import geopandas as gpd
import sys
from pathlib import Path

notebook_path = Path.cwd()
project_root = notebook_path.parent
sys.path.append(str(project_root))


from scripts import config

Let's inspect the data.

In [2]:
# Load dataset
gdf = gpd.read_file(config.GDB_PATH, layer="Water_Area_CO")

print(gdf.columns.tolist())
gdf.head()


  return ogr_read(


['FID_CO_State_Boundary_Buffer', 'FID_NHDArea', 'permanent_identifier', 'fdate', 'resolution', 'gnis_id', 'gnis_name', 'areasqkm', 'elevation', 'ftype', 'fcode', 'visibilityfilter', 'nhdplusid', 'vpuid', 'onoffnet', 'purpcode', 'burn', 'FID_NHDWaterbody', 'reachcode', 'MERGE_SRC', 'Shape_Length', 'Shape_Area', 'geometry']


Unnamed: 0,FID_CO_State_Boundary_Buffer,FID_NHDArea,permanent_identifier,fdate,resolution,gnis_id,gnis_name,areasqkm,elevation,ftype,...,vpuid,onoffnet,purpcode,burn,FID_NHDWaterbody,reachcode,MERGE_SRC,Shape_Length,Shape_Area,geometry
0,1,93903.0,127819176,2004-10-20 10:45:11+00:00,2,,,0.022,,460,...,1019,1.0,WB,1.0,,,NHDArea_CO,0.032873,2.331754e-06,"MULTIPOLYGON Z (((-105.94407 38.9969 0, -105.9..."
1,1,93982.0,127819425,2004-10-20 10:45:27+00:00,2,,,0.207,,460,...,1019,1.0,WB,1.0,,,NHDArea_CO,0.218455,2.151835e-05,"MULTIPOLYGON Z (((-105.61728 38.97288 0, -105...."
2,1,94601.0,127819171,2015-09-16 00:00:00+00:00,2,,,1.254206,,460,...,1019,1.0,WB,1.0,,,NHDArea_CO,1.86706,0.0001305322,"MULTIPOLYGON Z (((-106.00045 39.22189 0, -106...."
3,1,100977.0,133053032,2005-03-14 20:58:06+00:00,2,,,0.008,,484,...,1102,,,,,,NHDArea_CO,0.004659,7.805267e-07,"MULTIPOLYGON Z (((-105.97029 38.81162 0, -105...."
4,1,101018.0,133053031,2005-03-14 20:58:05+00:00,2,,,0.01,,484,...,1102,,,,,,NHDArea_CO,0.004893,1.055364e-06,"MULTIPOLYGON Z (((-105.96977 38.81598 0, -105...."


First of all, let's drop all unnecessary columns.

In [3]:
gdf_filtered = gdf[["geometry", "permanent_identifier", "fcode", "Shape_Area", "Shape_Length"]]
gdf_filtered.head()

Unnamed: 0,geometry,permanent_identifier,fcode,Shape_Area,Shape_Length
0,"MULTIPOLYGON Z (((-105.94407 38.9969 0, -105.9...",127819176,46006,2.331754e-06,0.032873
1,"MULTIPOLYGON Z (((-105.61728 38.97288 0, -105....",127819425,46006,2.151835e-05,0.218455
2,"MULTIPOLYGON Z (((-106.00045 39.22189 0, -106....",127819171,46006,0.0001305322,1.86706
3,"MULTIPOLYGON Z (((-105.97029 38.81162 0, -105....",133053032,48400,7.805267e-07,0.004659
4,"MULTIPOLYGON Z (((-105.96977 38.81598 0, -105....",133053031,48400,1.055364e-06,0.004893


For our purposes, we don't really need elevation in the geometry (multilinestring Z); if we really need elevation, we can get that from a separate elevation layer. The Z coordinates might be messing with gpkg exporting, so let's drop it.

In [4]:
from shapely.geometry import LineString, MultiLineString

def drop_z(geom):
    if geom is None:
        return None
    if geom.has_z:
        if isinstance(geom, LineString):
            return LineString([(pt[0], pt[1]) for pt in geom.coords])
        elif isinstance(geom, MultiLineString):
            return MultiLineString([
                LineString([(pt[0], pt[1]) for pt in line.coords])
                for line in geom.geoms
            ])
    return geom

gdf_drop_z = gdf_filtered.copy()
gdf_drop_z["geometry"] = gdf_filtered["geometry"].apply(drop_z)
gdf_drop_z.head()

Unnamed: 0,geometry,permanent_identifier,fcode,Shape_Area,Shape_Length
0,"MULTIPOLYGON Z (((-105.94407 38.9969 0, -105.9...",127819176,46006,2.331754e-06,0.032873
1,"MULTIPOLYGON Z (((-105.61728 38.97288 0, -105....",127819425,46006,2.151835e-05,0.218455
2,"MULTIPOLYGON Z (((-106.00045 39.22189 0, -106....",127819171,46006,0.0001305322,1.86706
3,"MULTIPOLYGON Z (((-105.97029 38.81162 0, -105....",133053032,48400,7.805267e-07,0.004659
4,"MULTIPOLYGON Z (((-105.96977 38.81598 0, -105....",133053031,48400,1.055364e-06,0.004893


Let's do some cleaning. First, we need to project to the right CRS, then check for empty/invalid geometries. There's also a weird UUID entry in permanent_Identifier - let's cast that field to string just in case. Finally, let's rebuffer the geometry and simplify with a tolerance of 1 to ensure exporting goes smoothly.

In [5]:
# Remove invalid or empty geometries
gdf_drop_z = gdf_drop_z[~gdf.geometry.is_empty & gdf.geometry.is_valid]

# Cast permanent_identifier as a string
gdf_drop_z["permanent_identifier"] = gdf_drop_z["permanent_identifier"].astype(str)


Project to CRS and buffer.

In [6]:
gdf_crs = gdf_drop_z.to_crs(config.BUFFER_CRS)

gdf_buffered = gdf_crs.copy()
gdf_buffered["geometry"] = gdf_crs.geometry.buffer(config.BUFFER_DISTANCE_WATER_METERS)

Let's convert all polygons to multipolygons.

In [7]:
from shapely.geometry import MultiPolygon, Polygon

def ensure_multipolygon(geom):
    if geom is None or geom.is_empty:
        return geom
    if isinstance(geom, Polygon):
        return MultiPolygon([geom])
    elif isinstance(geom, MultiPolygon):
        return geom
    return geom


gdf_buffered["geometry"] = gdf_buffered["geometry"].apply(ensure_multipolygon)

Now let's export the data.

In [8]:
output_folder = config.OUTPUT_DIR / "water_areas_buffered"
output_folder.mkdir(parents=True, exist_ok=True)

output_fp = output_folder / "water_areas_buffered.gpkg"
gdf_buffered.to_file(output_fp, driver="GPKG")

print("Saved buffered areas to:", output_fp)

Saved buffered areas to: /Users/loganproffitt/Desktop/CampGIS.nosync/Repo/CampGIS/outputs/water_areas_buffered/water_areas_buffered.gpkg
