
# ET & Precipitation by Land Use (CDL) — Google Earth Engine

This notebook estimates **evapotranspiration (ET)** and **precipitation (P)** by **land-use classes** using:
- **OpenET/ENSEMBLE/CONUS/GRIDMET/MONTHLY/v2_0** (ET, mm)
- **IDAHO_EPSCOR/GRIDMET** (precipitation, mm)
- **USDA/NASS/CDL** (land cover/land use; 2008+) (Cropland Data Layer)

It writes a CSV and generates QA plots for quick checks.


## 1) Requirements

In [1]:

# If needed in a fresh environment, run:
# !pip install --upgrade pip
# !pip install earthengine-api geopandas shapely fiona pandas matplotlib
#
# Note: Installing GDAL/GEOS/PROJ for GeoPandas may require system packages.


## 2) Authenticate & Initialize Earth Engine

In [None]:

import ee

EE_PROJECT = 'ee-your-project-id'  # e.g., 'ee-mikediastat' (optional)

try:
    ee.Initialize(project=EE_PROJECT)
except Exception:
    ee.Authenticate()
    ee.Initialize(project=EE_PROJECT)

print("Earth Engine initialized.")


Earth Engine initialized.


## 3) Parameters

In [4]:

from pathlib import Path

USE_IOWA_STATE = True  # If False, use SHP_PATH below
SHP_PATH = Path(r"C:/Users/adi10136/OneDrive - Iowa State University/Desktop/Runoff Analysis/Shp/Ioway Creek/layers/globalwatershed.shp")

OUT_DIR = Path("PAPER_CALCULATION_ET")
OUT_DIR.mkdir(parents=True, exist_ok=True)
CSV_PATH = OUT_DIR / "Iowa_ET_P_by_LandUse.csv"

START_YEAR = 2000
END_YEAR   = 2023
SKIP_LU_BEFORE_2008 = True
SCALE = 30

LAND_USE_CLASSES = {
    176: "Grassland/Pasture",
    5:   "Soybeans",
    1:   "Corn",
    36:  "Alfalfa",
    141: "Deciduous Forest",
}
URBAN_CODES = [121, 122, 123, 124]


## 4) Build Region of Interest (ROI)

In [5]:

import geopandas as gpd

def build_roi():
    if USE_IOWA_STATE:
        states = ee.FeatureCollection("TIGER/2018/States")
        iowa = states.filter(ee.Filter.eq('NAME', 'Iowa'))
        return iowa.geometry()
    else:
        gdf = gpd.read_file(SHP_PATH).to_crs(epsg=4326)
        geom = gdf.geometry.iloc[0]
        if geom.geom_type == 'MultiPolygon':
            geom = list(geom.geoms)[0]
        coords = list(geom.exterior.coords)
        return ee.Geometry.Polygon([coords])

ROI = build_roi()
print("ROI ready.")


ROI ready.


## 5) Helper functions

In [6]:

def get_cdl_image(year: int):
    ic = ee.ImageCollection("USDA/NASS/CDL").filterDate(f"{year}-01-01", f"{year}-12-31")
    return ic.first()

def select_cdl_band(img):
    try:
        return img.select("cropland")
    except Exception:
        return img.select("landcover")

def cdl_mask(img, code):
    return select_cdl_band(img).eq(code)

def union_mask(img, codes):
    m = None
    for c in codes:
        cm = select_cdl_band(img).eq(c)
        m = cm if m is None else m.Or(cm)
    return m

def reduce_mean(image, geom, scale=SCALE):
    return image.reduceRegion(
        reducer=ee.Reducer.mean(),
        geometry=geom,
        scale=scale,
        bestEffort=True,
        maxPixels=1e13
    )

def masked_mean(image, mask, geom, scale=SCALE):
    return reduce_mean(image.updateMask(mask), geom, scale)

def masked_area_m2(mask, geom, scale=SCALE):
    area_img = ee.Image.pixelArea().updateMask(mask)
    return area_img.reduceRegion(
        reducer=ee.Reducer.sum(),
        geometry=geom,
        scale=scale,
        bestEffort=True,
        maxPixels=1e13
    ).get("area")

def safe_get(ee_obj, default=None):
    try:
        return ee_obj.getInfo()
    except Exception:
        return default


## 6) Compute ET & P by land use and write CSV (water years)

In [None]:

import csv

with open(CSV_PATH, mode='w', newline='') as f:
    writer = csv.writer(f)
    writer.writerow([
        "Year", "Land_Use_Code", "Land_Use_Label", "Land_Use_Area_m2",
        "Mean_ET_Watershed", "Mean_ET_Land_Use",
        "Mean_Precip_Watershed", "Mean_Precip_Land_Use",
        "P_minus_ET", "ET_over_P"
    ])

for year in range(START_YEAR, END_YEAR + 1):
    print(f"Processing {year} ...")
    start_wy = f"{year-1}-10-01"
    end_wy   = f"{year}-09-30"

    et_sum = ee.ImageCollection('OpenET/ENSEMBLE/CONUS/GRIDMET/MONTHLY/v2_0')\
                .filterDate(start_wy, end_wy)\
                .select('et_ensemble_mad')\
                .sum()

    pr_sum = ee.ImageCollection('IDAHO_EPSCOR/GRIDMET')\
                .filterDate(start_wy, end_wy)\
                .select('pr')\
                .sum()

    et_roi_mean = safe_get(reduce_mean(et_sum, ROI).get('et_ensemble_mad'))
    pr_roi_mean = safe_get(reduce_mean(pr_sum, ROI).get('pr'))

    cdl = get_cdl_image(year)
    has_cdl = False
    try:
        _ = select_cdl_band(cdl)
        has_cdl = True
    except Exception:
        has_cdl = False

    if SKIP_LU_BEFORE_2008 and (not has_cdl):
        print(f"  Skipping land-use breakdown for {year} (CDL not available)." )
        continue

    rows = []
    for code, label in LAND_USE_CLASSES.items():
        m = cdl_mask(cdl, code)
        area_m2 = safe_get(masked_area_m2(m, ROI), 0.0) or 0.0
        et_lu = safe_get(masked_mean(et_sum, m, ROI).get('et_ensemble_mad'))
        pr_lu = safe_get(masked_mean(pr_sum, m, ROI).get('pr'))
        p_minus_et = (pr_lu - et_lu) if (et_lu is not None and pr_lu is not None) else None
        et_over_p  = (et_lu / pr_lu) if (et_lu is not None and pr_lu not in (None, 0)) else None
        rows.append([year, code, label, area_m2, et_roi_mean, et_lu, pr_roi_mean, pr_lu, p_minus_et, et_over_p])

    m_urban = union_mask(cdl, URBAN_CODES)
    area_urb = safe_get(masked_area_m2(m_urban, ROI), 0.0) or 0.0
    et_urb = safe_get(masked_mean(et_sum, m_urban, ROI).get('et_ensemble_mad'))
    pr_urb = safe_get(masked_mean(pr_sum, m_urban, ROI).get('pr'))
    p_minus_et_urb = (pr_urb - et_urb) if (et_urb is not None and pr_urb is not None) else None
    et_over_p_urb  = (et_urb / pr_urb) if (et_urb is not None and pr_urb not in (None, 0)) else None
    rows.append([year, 999, "Urban (121–124)", area_urb, et_roi_mean, et_urb, pr_roi_mean, pr_urb, p_minus_et_urb, et_over_p_urb])

    with open(CSV_PATH, mode='a', newline='') as f:
        writer = csv.writer(f)
        writer.writerows(rows)

print("Done. Wrote:", CSV_PATH)


Processing 2000 ...


## 7) Load CSV and Quick QA Plots

In [None]:

import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv(CSV_PATH)
df = df.dropna(subset=["Mean_ET_Land_Use", "Mean_Precip_Land_Use"])
df["Year"] = df["Year"].astype(int)

pet = df.pivot_table(index="Year", columns="Land_Use_Label", values="P_minus_ET", aggfunc="mean")
etp = df.pivot_table(index="Year", columns="Land_Use_Label", values="ET_over_P",  aggfunc="mean")
et  = df.pivot_table(index="Year", columns="Land_Use_Label", values="Mean_ET_Land_Use", aggfunc="mean")
pr  = df.pivot_table(index="Year", columns="Land_Use_Label", values="Mean_Precip_Land_Use", aggfunc="mean")

for title,table,ylabel in [
    ("P − ET by Land Use", pet, "P − ET (mm)"),
    ("ET / P by Land Use", etp, "ET / P (ratio)"),
    ("ET by Land Use", et, "ET (mm)"),
    ("Precipitation by Land Use", pr, "P (mm)"),
]:
    plt.figure(figsize=(10,4))
    for col in table.columns:
        plt.plot(table.index, table[col], label=col, linewidth=1)
    plt.title(title); plt.xlabel("Year"); plt.ylabel(ylabel)
    plt.legend(loc="best", fontsize=8); plt.grid(True); plt.tight_layout(); plt.show()



## 8) Notes
- **Units**: GRIDMET `pr` and OpenET `et_ensemble_mad` are in **mm**. Thus `P_minus_ET` is also in mm.
- **CDL**: Available **from 2008 onward**. If you set `SKIP_LU_BEFORE_2008=True`, the code skips LU rows for earlier years.
- **Water Year**: Oct 1 (prev year) → Sep 30 (current year).
- **ROI area**: Class area is computed via `pixelArea()` over the mask within the ROI.
- **Performance**: For large ROIs or many years/classes, consider increasing `SCALE` (e.g., 250–1000) to reduce EE compute.
- **Custom ROI**: Toggle `USE_IOWA_STATE=False` and set `SHP_PATH` to your watershed polygon.
- **Reproducibility**: Reproducibility is authorized under the repository and appropriate citation.
