**Buffer Water Flowlines**

In [None]:
import geopandas as gpd
import sys
from pathlib import Path

notebook_path = Path.cwd()
project_root = notebook_path.parent
sys.path.append(str(project_root))


from scripts import config

Let's inspect the data.

In [7]:
# Load dataset
gdf = gpd.read_file(config.GDB_PATH, layer="Water_Flow_CO")
print(f"Original CRS: {gdf.crs}, Features: {len(gdf)}")

gdf.head()


  return ogr_read(


Original CRS: EPSG:4269, Features: 996082


Unnamed: 0,FID_CO_State_Boundary_Buffer,FID_NonNetworkNHDFlowline,permanent_Identifier,fdate,resolution,gnis_id,gnis_name,lengthkm,reachcode,flowdir,...,qlossma,qgadjma,qgnavma,gageadjma,avgqadjma,gageidma,gageqma,MERGE_SRC,Shape_Length,geometry
0,1,342476.0,128859000,2012-02-19 07:44:13+00:00,2,,,0.058,10180001011402,0,...,,,,,,,,NonNetworkFlowline_CO,0.000564,"MULTILINESTRING Z ((-106.21778 40.79963 0, -10..."
1,1,342565.0,128856502,2012-02-19 07:45:05+00:00,2,,,0.977,10180001010889,0,...,,,,,,,,NonNetworkFlowline_CO,0.010109,"MULTILINESTRING Z ((-105.86098 40.50956 0, -10..."
2,1,342584.0,128856504,2012-02-19 07:43:11+00:00,2,202442.0,Upper Michigan Ditch,0.841,10180001010891,0,...,,,,,,,,NonNetworkFlowline_CO,0.008326,"MULTILINESTRING Z ((-105.86603 40.49719 0, -10..."
3,1,342701.0,{6BC1BF61-46A0-4331-9412-4F8401B4E386},2014-04-24 00:00:00+00:00,2,202424.0,Skyline Ditch,0.390181,10180010002494,0,...,,,,,,,,NonNetworkFlowline_CO,0.00376,"MULTILINESTRING Z ((-105.86377 40.62951 0, -10..."
4,1,342983.0,132213398,2012-02-18 00:01:49+00:00,2,,,0.305,10180010002491,0,...,,,,,,,,NonNetworkFlowline_CO,0.003429,"MULTILINESTRING Z ((-105.85145 40.67614 0, -10..."


First of all, let's drop all unnecessary columns.

In [8]:
gdf = gdf[["geometry", "permanent_Identifier", "flowdir", "Shape_Length"]]
gdf.head()

Unnamed: 0,geometry,permanent_Identifier,flowdir,Shape_Length
0,"MULTILINESTRING Z ((-106.21778 40.79963 0, -10...",128859000,0,0.000564
1,"MULTILINESTRING Z ((-105.86098 40.50956 0, -10...",128856502,0,0.010109
2,"MULTILINESTRING Z ((-105.86603 40.49719 0, -10...",128856504,0,0.008326
3,"MULTILINESTRING Z ((-105.86377 40.62951 0, -10...",{6BC1BF61-46A0-4331-9412-4F8401B4E386},0,0.00376
4,"MULTILINESTRING Z ((-105.85145 40.67614 0, -10...",132213398,0,0.003429


For our purposes, we don't really need elevation in the geometry (multilinestring Z); if we really need elevation, we can get that from a separate elevation layer. The Z coordinates might be messing with gpkg exporting, so let's drop it.

In [4]:
from shapely.geometry import LineString, MultiLineString

def drop_z(geom):
    if geom is None:
        return None
    if geom.has_z:
        if isinstance(geom, LineString):
            return LineString([(pt[0], pt[1]) for pt in geom.coords])
        elif isinstance(geom, MultiLineString):
            return MultiLineString([
                LineString([(pt[0], pt[1]) for pt in line.coords])
                for line in geom.geoms
            ])
    return geom

gdf["geometry"] = gdf["geometry"].apply(drop_z)
gdf.head()

Unnamed: 0,geometry,permanent_Identifier,flowdir,Shape_Length
0,"MULTILINESTRING ((-106.21778 40.79963, -106.21...",128859000,0,0.000564
1,"MULTILINESTRING ((-105.86098 40.50956, -105.86...",128856502,0,0.010109
2,"MULTILINESTRING ((-105.86603 40.49719, -105.86...",128856504,0,0.008326
3,"MULTILINESTRING ((-105.86377 40.62951, -105.86...",{6BC1BF61-46A0-4331-9412-4F8401B4E386},0,0.00376
4,"MULTILINESTRING ((-105.85145 40.67614, -105.85...",132213398,0,0.003429


Let's do some cleaning. First, we need to project to the right CRS, then check for empty/invalid geometries. There's also a weird UUID entry in permanent_Identifier - let's cast that field to string just in case. Finally, let's rebuffer the geometry and simplify with a tolerance of 1 to ensure exporting goes smoothly.

In [5]:
print("Initial count:", len(gdf))

# Remove invalid or empty geometries
gdf = gdf[~gdf.geometry.is_empty & gdf.geometry.is_valid]
print("After validity check:", len(gdf))

gdf["permanent_Identifier"] = gdf["permanent_Identifier"].astype(str)


Initial count: 996082
After validity check: 996082


Project to CRS and buffer.

In [6]:
gdf = gdf.to_crs(config.BUFFER_CRS)
gdf["geometry"] = gdf.geometry.buffer(config.BUFFER_DISTANCE_WATER_FT)

Check types.

In [None]:
gdf["geometry_type"] = gdf.geometry.geom_type
print(gdf["geometry_type"].value_counts())


geometry_type
Polygon         996050
MultiPolygon        32
Name: count, dtype: int64


We have mismatching types, so let's convert all polygons to multipolygons.

In [None]:
from shapely.geometry import MultiPolygon, Polygon

def ensure_multipolygon(geom):
    if geom is None or geom.is_empty:
        return geom
    if isinstance(geom, Polygon):
        return MultiPolygon([geom])
    elif isinstance(geom, MultiPolygon):
        return geom
    return geom  # In case something unexpected sneaks in

gdf["geometry"] = gdf["geometry"].apply(ensure_multipolygon)

gdf["geometry_type"] = gdf.geometry.geom_type
print(gdf["geometry_type"].value_counts())

gdf.head()

geometry_type
MultiPolygon    996082
Name: count, dtype: int64


Unnamed: 0,geometry,permanent_Identifier,flowdir,Shape_Length,geometry_type
0,"MULTIPOLYGON (((397131.3 4517381.1, 397142.345...",128859000,0,0.000564,MultiPolygon
1,"MULTIPOLYGON (((426317.032 4484157.465, 426320...",128856502,0,0.010109,MultiPolygon
2,"MULTIPOLYGON (((426160.788 4484020.768, 426161...",128856504,0,0.008326,MultiPolygon
3,"MULTIPOLYGON (((426866.669 4497742.943, 426854...",{6BC1BF61-46A0-4331-9412-4F8401B4E386},0,0.00376,MultiPolygon
4,"MULTIPOLYGON (((427931.631 4503415.728, 427948...",132213398,0,0.003429,MultiPolygon


Now let's export the data.

In [None]:
output_folder = config.OUTPUT_DIR / "water_flowlines_buffered"
output_folder.mkdir(parents=True, exist_ok=True)

output_fp = output_folder / "water_flowlines_buffered.gpkg"
gdf.to_file(output_fp, driver="GPKG")

print("Saved buffered flowlines to:", output_fp)

Saved buffered flowlines to: /Users/loganproffitt/Desktop/CampGIS.nosync/Repo/CampGIS/outputs/water_flowlines_buffered/water_flowlines_buffered.gpkg
