**Buffer Water Areas**

In [1]:
import geopandas as gpd
import sys
from pathlib import Path

import sys
from pathlib import Path

notebook_path = Path.cwd()
project_root = notebook_path.parent
sys.path.append(str(project_root))


from scripts import config
from scripts.tasks.buffer_mvum_roads import buffer_mvum_roads
from scripts.tasks.buffer_water_features import buffer_water_flowlines, buffer_water_areas

Let's inspect the data.

In [3]:
# Load dataset
gdf = gpd.read_file(config.GDB_PATH, layer="Water_Area_CO")
print(f"Original CRS: {gdf.crs}, Features: {len(gdf)}")

gdf.head()


  return ogr_read(


Original CRS: EPSG:4269, Features: 118464


Unnamed: 0,FID_CO_State_Boundary_Buffer,FID_NHDArea,permanent_identifier,fdate,resolution,gnis_id,gnis_name,areasqkm,elevation,ftype,...,vpuid,onoffnet,purpcode,burn,FID_NHDWaterbody,reachcode,MERGE_SRC,Shape_Length,Shape_Area,geometry
0,1,93903.0,127819176,2004-10-20 10:45:11+00:00,2,,,0.022,,460,...,1019,1.0,WB,1.0,,,NHDArea_CO,0.032873,2.331754e-06,"MULTIPOLYGON Z (((-105.94407 38.9969 0, -105.9..."
1,1,93982.0,127819425,2004-10-20 10:45:27+00:00,2,,,0.207,,460,...,1019,1.0,WB,1.0,,,NHDArea_CO,0.218455,2.151835e-05,"MULTIPOLYGON Z (((-105.61728 38.97288 0, -105...."
2,1,94601.0,127819171,2015-09-16 00:00:00+00:00,2,,,1.254206,,460,...,1019,1.0,WB,1.0,,,NHDArea_CO,1.86706,0.0001305322,"MULTIPOLYGON Z (((-106.00045 39.22189 0, -106...."
3,1,100977.0,133053032,2005-03-14 20:58:06+00:00,2,,,0.008,,484,...,1102,,,,,,NHDArea_CO,0.004659,7.805267e-07,"MULTIPOLYGON Z (((-105.97029 38.81162 0, -105...."
4,1,101018.0,133053031,2005-03-14 20:58:05+00:00,2,,,0.01,,484,...,1102,,,,,,NHDArea_CO,0.004893,1.055364e-06,"MULTIPOLYGON Z (((-105.96977 38.81598 0, -105...."


Let's check out the features.

In [4]:
print(gdf.columns.tolist())

['FID_CO_State_Boundary_Buffer', 'FID_NHDArea', 'permanent_identifier', 'fdate', 'resolution', 'gnis_id', 'gnis_name', 'areasqkm', 'elevation', 'ftype', 'fcode', 'visibilityfilter', 'nhdplusid', 'vpuid', 'onoffnet', 'purpcode', 'burn', 'FID_NHDWaterbody', 'reachcode', 'MERGE_SRC', 'Shape_Length', 'Shape_Area', 'geometry']


First of all, let's drop all unnecessary columns.

In [5]:
gdf = gdf[["geometry", "permanent_identifier", "fcode", "Shape_Area", "Shape_Length"]]
gdf.head()

Unnamed: 0,geometry,permanent_identifier,fcode,Shape_Area,Shape_Length
0,"MULTIPOLYGON Z (((-105.94407 38.9969 0, -105.9...",127819176,46006,2.331754e-06,0.032873
1,"MULTIPOLYGON Z (((-105.61728 38.97288 0, -105....",127819425,46006,2.151835e-05,0.218455
2,"MULTIPOLYGON Z (((-106.00045 39.22189 0, -106....",127819171,46006,0.0001305322,1.86706
3,"MULTIPOLYGON Z (((-105.97029 38.81162 0, -105....",133053032,48400,7.805267e-07,0.004659
4,"MULTIPOLYGON Z (((-105.96977 38.81598 0, -105....",133053031,48400,1.055364e-06,0.004893


For our purposes, we don't really need elevation in the geometry (multilinestring Z); if we really need elevation, we can get that from a separate elevation layer. The Z coordinates might be messing with gpkg exporting, so let's drop it.

In [6]:
from shapely.geometry import LineString, MultiLineString

def drop_z(geom):
    if geom is None:
        return None
    if geom.has_z:
        if isinstance(geom, LineString):
            return LineString([(pt[0], pt[1]) for pt in geom.coords])
        elif isinstance(geom, MultiLineString):
            return MultiLineString([
                LineString([(pt[0], pt[1]) for pt in line.coords])
                for line in geom.geoms
            ])
    return geom

gdf["geometry"] = gdf["geometry"].apply(drop_z)
gdf.head()

Unnamed: 0,geometry,permanent_identifier,fcode,Shape_Area,Shape_Length
0,"MULTIPOLYGON Z (((-105.94407 38.9969 0, -105.9...",127819176,46006,2.331754e-06,0.032873
1,"MULTIPOLYGON Z (((-105.61728 38.97288 0, -105....",127819425,46006,2.151835e-05,0.218455
2,"MULTIPOLYGON Z (((-106.00045 39.22189 0, -106....",127819171,46006,0.0001305322,1.86706
3,"MULTIPOLYGON Z (((-105.97029 38.81162 0, -105....",133053032,48400,7.805267e-07,0.004659
4,"MULTIPOLYGON Z (((-105.96977 38.81598 0, -105....",133053031,48400,1.055364e-06,0.004893


Let's do some cleaning. First, we need to project to the right CRS, then check for empty/invalid geometries. There's also a weird UUID entry in permanent_Identifier - let's cast that field to string just in case. Finally, let's rebuffer the geometry and simplify with a tolerance of 1 to ensure exporting goes smoothly.

In [7]:
print("Initial count:", len(gdf))

# Remove invalid or empty geometries
gdf = gdf[~gdf.geometry.is_empty & gdf.geometry.is_valid]
print("After validity check:", len(gdf))

gdf["permanent_identifier"] = gdf["permanent_identifier"].astype(str)


Initial count: 118464
After validity check: 118436


Project to CRS and buffer.

In [8]:
gdf = gdf.to_crs(config.BUFFER_CRS)
gdf["geometry"] = gdf.geometry.buffer(config.BUFFER_DISTANCE_WATER_FT)

Check types.

In [9]:
gdf["geometry_type"] = gdf.geometry.geom_type
print(gdf["geometry_type"].value_counts())


geometry_type
Polygon         118432
MultiPolygon         4
Name: count, dtype: int64


We have mismatching types, so let's convert all polygons to multipolygons.

In [10]:
from shapely.geometry import MultiPolygon, Polygon

def ensure_multipolygon(geom):
    if geom is None or geom.is_empty:
        return geom
    if isinstance(geom, Polygon):
        return MultiPolygon([geom])
    elif isinstance(geom, MultiPolygon):
        return geom
    return geom

gdf["geometry"] = gdf["geometry"].apply(ensure_multipolygon)

gdf["geometry_type"] = gdf.geometry.geom_type
print(gdf["geometry_type"].value_counts())

gdf.head()

geometry_type
MultiPolygon    118436
Name: count, dtype: int64


Unnamed: 0,geometry,permanent_identifier,fcode,Shape_Area,Shape_Length,geometry_type
0,"MULTIPOLYGON (((417952.468 4316769.948, 417950...",127819176,46006,2.331754e-06,0.032873,MultiPolygon
1,"MULTIPOLYGON (((446270.954 4314046.577, 446274...",127819425,46006,2.151835e-05,0.218455,MultiPolygon
2,"MULTIPOLYGON (((413520.586 4341678.75, 413511....",127819171,46006,0.0001305322,1.86706,MultiPolygon
3,"MULTIPOLYGON (((415586.987 4296187.181, 415577...",133053032,48400,7.805267e-07,0.004659,MultiPolygon
4,"MULTIPOLYGON (((415726.254 4297048.513, 415731...",133053031,48400,1.055364e-06,0.004893,MultiPolygon


Now let's export the data.

In [None]:
output_folder = config.OUTPUT_DIR / "water_areas_buffered"
output_folder.mkdir(parents=True, exist_ok=True)

output_fp = output_folder / "water_areas_buffered.gpkg"
gdf.to_file(output_fp, driver="GPKG")

print("Saved buffered areas to:", output_fp)

Saved buffered flowlines to: /Users/loganproffitt/Desktop/CampGIS.nosync/Repo/CampGIS/outputs/water_flowlines_buffered/water_flowlines_buffered.gpkg
