In [1]:
import geopandas as gpd
from shapely.geometry import LineString, Point
from shapely.ops import linemerge
import numpy as np
import pandas as pd
import rasterio
from rasterio.features import rasterize

In [2]:
# Load streams and lakes
streams_file = '/Users/jpnousu/WBT_data/shapes/merged_rivers_streams_2D.shp'
new_streams_file = '/Users/jpnousu/WBT_data/shapes/merged_rivers_streams_2D_mod.shp'
lakes_file = r'/Users/jpnousu/WBT_data/shapes/clipped_MTK-vakavesi_19-01-23_jarvi.shp'

streams = gpd.read_file(streams_file)
lakes = gpd.read_file(lakes_file)

def get_endpoints(line):
    coords = list(line.coords)
    return Point(coords[0]), Point(coords[-1])

def find_endpoints_on_lake_boundary(streams, lake):
    # Find stream endpoints that touch lake boundary
    lake_boundary = lake.boundary
    endpoints = []
    for idx, row in streams.iterrows():
        start, end = get_endpoints(row.geometry)
        if start.intersects(lake_boundary):
            endpoints.append((idx, start))
        if end.intersects(lake_boundary):
            endpoints.append((idx, end))
    return endpoints

new_lines = []

for lake_idx, lake_row in lakes.iterrows():
    # Get streams intersecting lake
    intersecting_streams = streams[streams.intersects(lake_row.geometry)]
    
    endpoints = find_endpoints_on_lake_boundary(intersecting_streams, lake_row.geometry)
    
    # If less than two endpoints, cannot connect
    if len(endpoints) < 2:
        continue
    
    # Naive approach: connect pairs of endpoints
    # If odd number or multiple pairs, you might want to cluster by location
    for i in range(0, len(endpoints) - 1, 2):
        pt1 = endpoints[i][1]
        pt2 = endpoints[i+1][1]
        new_line = LineString([pt1, pt2])
        new_lines.append(new_line)

# Create GeoDataFrame for new connecting lines
connections = gpd.GeoDataFrame(geometry=new_lines, crs=streams.crs)

# Remove stream segments fully inside lakes to avoid duplicates or artifacts
streams_no_lake = streams[~streams.within(lakes.unary_union)]

# Append the new connecting lines
streams_connected = pd.concat([streams_no_lake, connections], ignore_index=True)

streams_connected.to_file(new_streams_file)

  streams_no_lake = streams[~streams.within(lakes.unary_union)]


In [3]:
import geopandas as gpd
import rasterio
from rasterio.features import rasterize

# File paths
stream_file = r'/Users/jpnousu/WBT_data/shapes/clipped_MTK-virtavesi_19-01-23_virtavesikapea.shp'
river_file = r'/Users/jpnousu/WBT_data/shapes/clipped_MTK-virtavesi_19-01-23_virtavesialue.shp'
lake_file = r'/Users/jpnousu/WBT_data/shapes/clipped_MTK-vakavesi_24-08-01_jarvi.shp'
ref_file = r'/Users/jpnousu/WBT_data/pallas/korkeusmalli_4m_culverts.tif'
out_file = '/Users/jpnousu/WBT_data/pallas/hydro_4m.tif'  # changed to reflect stream+river+lake

# Load geometries
streams = gpd.read_file(stream_file)
rivers = gpd.read_file(river_file)
lakes = gpd.read_file(lake_file)

# Open the reference raster
with rasterio.open(ref_file) as ref:
    meta = ref.meta.copy()
    out_shape = (ref.height, ref.width)
    transform = ref.transform
    crs = ref.crs

# Reproject to match reference raster
for gdf in [streams, rivers, lakes]:
    if gdf.crs != crs:
        gdf.to_crs(crs, inplace=True)

# Combine all geometries (streams, rivers, lakes)
combined_shapes = (
    [(geom, 1) for geom in streams.geometry if geom is not None] +
    [(geom, 1) for geom in rivers.geometry if geom is not None] +
    [(geom, 1) for geom in lakes.geometry if geom is not None]
)

# Rasterize
rasterized = rasterize(
    combined_shapes,
    out_shape=out_shape,
    transform=transform,
    fill=0,
    dtype='uint8'
)

# Update metadata
meta.update({
    "driver": "GTiff",
    "dtype": 'uint8',
    "count": 1,
    "compress": "lzw",
    "nodata": 0
})

# Write output raster
with rasterio.open(out_file, 'w', **meta) as dst:
    dst.write(rasterized, 1)

print(f"Hydrological mask raster (streams + rivers + lakes) saved to: {out_file}")


Hydrological mask raster (streams + rivers + lakes) saved to: /Users/jpnousu/WBT_data/pallas/hydro_4m.tif
