In [18]:
from pathlib import Path
from typing import Tuple, List

import numpy as np
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt
import shapely.geometry as sgeom
import regionmask
import ee

# ==== User parameters ====
NC_PATH   = Path("../data/cygnss_okavango_daily/cygnss_okavango_daily_merged.nc")  # <-- your merged daily file
EE_ASSET  = "projects/ee-okavango/assets/shapes/rainfall_regions"          # <-- your rainfall regions FC
ID_PROP   = "name"                                                         # property holding region name/id
VAR_NAME  = None  # e.g., "water_mask"; if None we auto-pick a var that has (lat, lon)

# # outputs
# PNG_OUT   = Path("cygnss_by_region.png")
# CSV_OUT   = Path("cygnss_by_region.csv")   # set to None to skip CSV


In [19]:
def ee_init():
    """Authenticate+initialize Earth Engine (prompts in notebook if needed)."""
    try:
        ee.Initialize()
    except Exception:
        ee.Authenticate()  # opens a browser prompt if not already authenticated
        ee.Initialize()

def ee_fc_to_shapes(asset_path: str, id_prop: str) -> tuple[list[str], list[sgeom.base.BaseGeometry]]:
    """Fetch a (modest) FeatureCollection and return (names, shapely geometries)."""
    fc = ee.FeatureCollection(asset_path)
    info = fc.getInfo()  # OK for small/medium FCs
    names, polys = [], []
    for i, feat in enumerate(info["features"]):
        props = feat.get("properties", {})
        rid = props.get(id_prop, f"region_{i}")
        geom = feat.get("geometry", None)
        if geom:
            names.append(str(rid))
            polys.append(sgeom.shape(geom))
    if not names:
        raise RuntimeError("No features found in the provided FeatureCollection.")
    return names, polys

def find_lat_lon_names(ds: xr.Dataset) -> Tuple[str, str]:
    lat = next(n for n in ds.dims if n.lower() in ("lat","latitude","y"))
    lon = next(n for n in ds.dims if n.lower() in ("lon","longitude","x"))
    return lat, lon

def pick_var_with_latlon(ds: xr.Dataset, lat: str, lon: str) -> str:
    """Pick a data variable that has (lat, lon) dims; prefer names with 'mask'/'water'."""
    preferred = [v for v in ds.data_vars if ("mask" in v.lower()) or ("water" in v.lower())]
    for v in preferred:
        if {lat, lon}.issubset(ds[v].dims):
            return v
    for v in ds.data_vars:
        if {lat, lon}.issubset(ds[v].dims):
            return v
    return list(ds.data_vars)[0]

def coslat_weights(lat: xr.DataArray) -> xr.DataArray:
    """Return 1D cos(latitude) weights as an xarray DataArray."""
    return xr.DataArray(np.cos(np.deg2rad(lat.values)), coords={lat.dims[0]: lat}, dims=lat.dims)

def area_weighted_mean_by_mask(da: xr.DataArray, mask2d: xr.DataArray) -> xr.DataArray:
    """
    Weighted mean over a boolean mask for each timestep.
    da:   (time, lat, lon)
    mask: (lat, lon) -> True inside region
    """
    time_name, lat_name, lon_name = da.dims
    w1d = coslat_weights(da[lat_name])
    w2d = w1d.broadcast_like(da.isel({time_name: 0}))
    num = (da.where(mask2d) * w2d).sum(dim=(lat_name, lon_name), skipna=True)
    den = (w2d.where(mask2d)).sum(dim=(lat_name, lon_name), skipna=True)
    return num / den


In [20]:
# Load dataset (merged daily NetCDF produced earlier)
ds = xr.open_dataset(NC_PATH, decode_times=True, mask_and_scale=True)

# Identify dims/variable
time_name = next((d for d in ds.dims if d.lower() == "time"), None)
if time_name is None:
    raise RuntimeError("No time dimension found in the NetCDF.")

lat_name, lon_name = find_lat_lon_names(ds)
varname = VAR_NAME or pick_var_with_latlon(ds, lat_name, lon_name)
print("Using variable:", varname)

# Normalize to (time, lat, lon)
da = ds[varname]
if da.dims != (time_name, lat_name, lon_name):
    da = da.transpose(time_name, lat_name, lon_name)

da


FileNotFoundError: [Errno 2] No such file or directory: b'/Users/octaviacrompton/Projects/dswe-inman-lyons/data/cygnss_okavango_daily/cygnss_okavango_daily_merged.nc'

In [None]:

ee_init()
region_names, polygons = ee_fc_to_shapes(EE_ASSET, ID_PROP)
print(f"Fetched {len(region_names)} regions from {EE_ASSET}")
