In [1]:
# Cell 1: Imports
# If needed, install:  pip install geopandas shapely rasterio pandas numpy fiona
import re
from pathlib import Path

import geopandas as gpd
from shapely.geometry import LineString
from shapely.ops import substring
import rasterio
from rasterio.features import rasterize
from rasterio.windows import from_bounds
import numpy as np
import pandas as pd

In [2]:
# Cell 2: Paths (edit only if your paths change)
bed_tif = Path("/Users/jagon/Library/Mobile Documents/com~apple~CloudDocs/Datasets/DEMs/BedMachine/DEM/v5/BedMachineGreenland-v5_bed.tif")

in_dir = Path("/Users/jagon/Documents/Projects/Collabs/Jessica Badgeley/New Points v3/Flowlines of Interest/Picks")
short_out_dir = Path("/Users/jagon/Documents/Projects/Collabs/Jessica Badgeley/Bathymetry Analysis/Frontal Sections")

meta_csv = Path("/Users/jagon/Documents/Projects/Collabs/Jessica Badgeley/New Points v3/Input/Box Coordinates/box_sp_all_v3.csv")
summary_out_dir = Path("/Users/jagon/Documents/Projects/Collabs/Jessica Badgeley/Bathymetry Analysis/Mean Depth")
summary_csv = summary_out_dir / "bed_mean.csv"

short_out_dir.mkdir(parents=True, exist_ok=True)
summary_out_dir.mkdir(parents=True, exist_ok=True)

In [3]:
# Cell 3: Helpers
def shorten_line_1km(line: LineString, max_len_m: float = 1000.0) -> LineString:
    """Return the first `max_len_m` meters of a LineString (EPSG:3413 meters)."""
    if line.length <= max_len_m:
        return line
    # shapely.ops.substring uses distances in units when normalized=False
    return substring(line, 0.0, max_len_m, normalized=False)

def pixels_under_line_mean(src: rasterio.io.DatasetReader, geom: LineString):
    """
    Return (mean_value, n_pixels) for all raster pixels intersected by the line.
    Uses rasterize(all_touched=True) on a window cropped to the line bounds.
    """
    minx, miny, maxx, maxy = geom.bounds
    # Create a small padding to be safe on window edges (e.g., 1 pixel)
    pad_x = abs(src.transform.a)
    pad_y = abs(src.transform.e)
    window = from_bounds(minx - pad_x, miny - pad_y, maxx + pad_x, maxy + pad_y, transform=src.transform)
    
    # Read subset
    data = src.read(1, window=window, masked=False)
    win_transform = src.window_transform(window)
    nodata = src.nodata

    # Rasterize line onto the window grid
    mask = rasterize(
        [(geom, 1)],
        out_shape=data.shape,
        transform=win_transform,
        fill=0,
        all_touched=True,
        dtype="uint8",
    )

    # Select raster cells touched by the line, excluding nodata/NaN
    if nodata is None:
        valid = (mask == 1) & ~np.isnan(data)
    else:
        valid = (mask == 1) & (data != nodata) & ~np.isnan(data)

    vals = data[valid]
    n = int(vals.size)
    mean_val = float(np.nanmean(vals)) if n > 0 else np.nan
    return mean_val, n

In [4]:
# Cell 4: Load metadata and prepare a lookup on feature_ID
meta = pd.read_csv(meta_csv)

# Handle possible column name typos
feature_col = None
for cand in ["feature_ID", "feautre_ID", "Feature_ID", "feature_id"]:
    if cand in meta.columns:
        feature_col = cand
        break

if feature_col is None:
    raise ValueError("Could not find a 'feature_ID' (or 'feautre_ID') column in the metadata CSV.")

# Make sure feature IDs are integers for robust matching
meta[feature_col] = pd.to_numeric(meta[feature_col], errors="coerce").astype("Int64")

In [8]:
# Cell 5: Main loop  (UPDATED)
results = []

with rasterio.open(bed_tif) as src:
    raster_crs = src.crs

    for f in sorted(in_dir.glob("*.geojson")):
        try:
            gdf = gpd.read_file(f)
            if gdf.empty:
                print(f"⚠️ Skipping (empty): {f.name}")
                continue

            # Ensure CRS matches the raster (reproject if needed)
            if gdf.crs is None:
                raise ValueError(f"No CRS found in {f.name}. Expected EPSG:3413.")
            if raster_crs and gdf.crs != raster_crs:
                gdf = gdf.to_crs(raster_crs)

            # Assume single LineString per file; if multiple, take the first
            geom = gdf.geometry.iloc[0]
            if geom.geom_type != "LineString":
                if geom.geom_type == "MultiLineString":
                    geom = max(list(geom.geoms), key=lambda L: L.length)
                else:
                    print(f"⚠️ Not a LineString in {f.name}, skipping.")
                    continue

            # Shorten to first 1 km
            short_geom = shorten_line_1km(geom, 1000.0)
            front_len = float(short_geom.length)  # meters

            # Save shortened line with _frontshort.geojson appended
            stem = f.stem
            out_name = stem + "_frontshort.geojson" if not stem.endswith("_frontshort") else stem + ".geojson"
            out_path = short_out_dir / out_name
            gpd.GeoDataFrame({"source":[f.name]}, geometry=[short_geom], crs=gdf.crs).to_file(out_path, driver="GeoJSON")

            # Compute mean bed value from pixels intersected by the shortened line
            bed_mean, bed_points = pixels_under_line_mean(src, short_geom)

            # Extract IDs from filename: gl_<feature_ID>_<flowline_ID>_...
            m = re.search(r"gl_(\d+)_(\d+)", f.stem)
            feature_id = int(m.group(1)) if m else None
            flowline_id = int(m.group(2)) if m else None

            # Join metadata
            glacier_ID = glacier_name = np.nan
            feat_id_out = feature_id if feature_id is not None else np.nan
            if feature_id is not None:
                row = meta.loc[meta[feature_col] == feature_id]
                if not row.empty:
                    glacier_ID = row["glacier_ID"].iloc[0] if "glacier_ID" in row.columns else np.nan
                    glacier_name = row["glacier_name"].iloc[0] if "glacier_name" in row.columns else np.nan
                else:
                    print(f"ℹ️ feature_ID {feature_id} not found in metadata for {f.name}")

            results.append({
                "glacier_ID": glacier_ID,
                "glacier_name": glacier_name,
                "feature_ID": feat_id_out,
                "flowline_ID": flowline_id,
                "bed_mean": bed_mean,
                "bed_points": int(bed_points),
                "front_lenght": front_len,     # keep spelling as requested
                "filename": f.name,
                "short_filename": out_path.name,   # renamed
            })

            print(f"✓ {f.name} -> mean={bed_mean:.3f} from {bed_points} px, length={front_len:.1f} m")

        except Exception as e:
            print(f"❌ Error processing {f.name}: {e}")

✓ gl_0_1.geojson -> mean=-490.061 from 10 px, length=1000.0 m
✓ gl_102_1.geojson -> mean=-292.632 from 11 px, length=1000.0 m
✓ gl_103_1.geojson -> mean=-407.877 from 11 px, length=1000.0 m
✓ gl_103_2.geojson -> mean=-477.709 from 11 px, length=1000.0 m
✓ gl_103_3.geojson -> mean=-401.797 from 10 px, length=1000.0 m
✓ gl_104_1.geojson -> mean=54.719 from 10 px, length=1000.0 m
✓ gl_126_1.geojson -> mean=-516.481 from 11 px, length=1000.0 m
✓ gl_126_2.geojson -> mean=-666.484 from 11 px, length=1000.0 m
✓ gl_126_3.geojson -> mean=-752.927 from 11 px, length=1000.0 m
✓ gl_146_1.geojson -> mean=105.518 from 11 px, length=1000.0 m
✓ gl_146_2.geojson -> mean=-16.445 from 8 px, length=1000.0 m
✓ gl_146_3.geojson -> mean=-297.838 from 10 px, length=1000.0 m
✓ gl_146_4.geojson -> mean=-389.576 from 10 px, length=1000.0 m
✓ gl_146_5.geojson -> mean=-325.169 from 10 px, length=1000.0 m
✓ gl_146_6.geojson -> mean=88.268 from 10 px, length=1000.0 m
✓ gl_147_1.geojson -> mean=-172.092 from 10 px, l

In [9]:
# Cell 6: Save summary CSV  (UPDATED)
df = pd.DataFrame(results)

# Order columns (metadata first) and include new flowline_ID + renamed short_filename
cols = [
    "glacier_ID", "glacier_name", "feature_ID", "flowline_ID",
    "bed_mean", "bed_points", "front_lenght", "filename", "short_filename"
]
df = df.reindex(columns=cols)

# Sort so glacier_ID starts at 1 and increases downward
df["_gID_num"] = pd.to_numeric(df["glacier_ID"], errors="coerce")
df = df.sort_values(by=["_gID_num", "feature_ID", "flowline_ID"], ascending=[True, True, True]).drop(columns="_gID_num")

df.to_csv(summary_csv, index=False)
summary_csv, df.head()

(PosixPath('/Users/jagon/Documents/Projects/Collabs/Jessica Badgeley/Bathymetry Analysis/Mean Depth/bed_mean.csv'),
     glacier_ID  glacier_name  feature_ID  flowline_ID   bed_mean  bed_points  \
 22           1     Diebitsch         173            1  59.837158           9   
 23           1     Diebitsch         173            2  86.809792          10   
 21           2  Morris Jesup         171            1  51.836479           9   
 19           3      Verhoeff         166            1  83.542183          10   
 20           3      Verhoeff         166            2  73.838104          11   
 
     front_lenght          filename               short_filename  
 22        1000.0  gl_173_1.geojson  gl_173_1_frontshort.geojson  
 23        1000.0  gl_173_2.geojson  gl_173_2_frontshort.geojson  
 21        1000.0  gl_171_1.geojson  gl_171_1_frontshort.geojson  
 19        1000.0  gl_166_1.geojson  gl_166_1_frontshort.geojson  
 20        1000.0  gl_166_2.geojson  gl_166_2_frontshort.geoj