In [None]:
this will get the fraction of burned area per 4km grid cell in each year for FireCII

In [None]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import os
import pathlib
import subprocess
import shlex
from osgeo import gdal, osr

# -------------------- USER PATHS --------------------
TEMPLATE = "/explore/nobackup/people/spotter5/clelland_fire_ml/tem_grid.tif"
IN_DIR   = "/explore/nobackup/people/spotter5/clelland_fire_ml/fire_cci_us"
OUT_DIR  = "/explore/nobackup/people/spotter5/clelland_fire_ml/fire_cci_us_temgrid_frac"

# Which years to process (adjust if needed)
YEARS = list(range(2001, 2020))

# -------------------- GDAL SPEED TWEAKS -------------
gdal.UseExceptions()
os.environ.setdefault("GDAL_NUM_THREADS", "ALL_CPUS")
gdal.SetConfigOption("GDAL_CACHEMAX", "4096")  # MB

# -------------------- HELPERS ------------------------
def tif_for_year(year: int) -> pathlib.Path:
    return pathlib.Path(IN_DIR) / f"{year}.tif"

def out_for_year(year: int) -> pathlib.Path:
    # Output as percent (0â€“100) Float32 GeoTIFF aligned to tem_grid
    return pathlib.Path(OUT_DIR) / f"{year}.tif"

def read_template_info(path):
    ds = gdal.Open(path, gdal.GA_ReadOnly)
    if ds is None:
        raise RuntimeError(f"Cannot open template: {path}")
    gt = ds.GetGeoTransform()
    proj_wkt = ds.GetProjectionRef()
    xsize, ysize = ds.RasterXSize, ds.RasterYSize
    x0, px, rx, y0, ry, py = gt
    left   = x0
    top    = y0
    right  = x0 + px * xsize + rx * ysize
    bottom = y0 + ry * xsize + py * ysize
    xmin, xmax = min(left, right), max(left, right)
    ymin, ymax = min(bottom, top), max(bottom, top)
    return {
        "gt": gt,
        "xsize": xsize, "ysize": ysize,
        "bounds": (xmin, ymin, xmax, ymax),
        "proj_wkt": proj_wkt
    }

def get_src_nodata(tif_path: str):
    ds = gdal.Open(tif_path, gdal.GA_ReadOnly)
    if ds is None:
        raise RuntimeError(f"Cannot open: {tif_path}")
    b = ds.GetRasterBand(1)
    nd = b.GetNoDataValue()
    ds = None
    return nd

def verify_match(out_path: str, tmpl_info, eps=1e-7):
    ds = gdal.Open(out_path, gdal.GA_ReadOnly)
    if ds is None:
        print(f"[WARN] Cannot open {out_path} for verification")
        return False
    ok = True
    # size
    if ds.RasterXSize != tmpl_info["xsize"] or ds.RasterYSize != tmpl_info["ysize"]:
        print(f"[MISMATCH] size out=({ds.RasterXSize},{ds.RasterYSize}) ref=({tmpl_info['xsize']},{tmpl_info['ysize']})")
        ok = False
    # geotransform
    gt_out = ds.GetGeoTransform()
    for i, (a, b) in enumerate(zip(gt_out, tmpl_info["gt"])):
        if abs(a - b) > eps:
            print(f"[MISMATCH] GT[{i}] out={a} ref={b}")
            ok = False
    # CRS
    sr_out = osr.SpatialReference(wkt=ds.GetProjection())
    sr_ref = osr.SpatialReference(wkt=tmpl_info["proj_wkt"])
    if not sr_out.IsSame(sr_ref):
        print("[MISMATCH] CRS differ")
        ok = False
    ds = None
    return ok

# -------------------- MAIN --------------------------
def main():
    tmpl = read_template_info(TEMPLATE)
    print("Template grid:")
    print("  size (W,H):", tmpl["xsize"], tmpl["ysize"])
    print("  bounds    :", tmpl["bounds"])
    x0, px, rx, y0, ry, py = tmpl["gt"]
    print("  pixel size:", (px, py))
    print("-" * 80)

    pathlib.Path(OUT_DIR).mkdir(parents=True, exist_ok=True)

    for year in YEARS:
        src = tif_for_year(year)
        if not src.exists():
            print(f"[MISS] {src} not found; skip")
            continue

        dst = out_for_year(year)
        if dst.exists():
            print(f"[SKIP] {dst} exists")
            continue

        # 1) Make a temporary binary mask (0/1), preserving NoData as 255
        #    so it can be ignored in average resampling.
        tmp_bin = dst.with_suffix(".bin.tif")

        src_nd = get_src_nodata(src.as_posix())
        calc_cmd = [
            "gdal_calc.py",
            "-A", src.as_posix(),
            "--outfile", tmp_bin.as_posix(),
            "--type=Byte",
            "--calc", "(A>0)",
            "--creation-option=COMPRESS=LZW",
            "--creation-option=TILED=YES",
            "--creation-option=BIGTIFF=IF_SAFER",
        ]
        if src_nd is not None:
            calc_cmd += [f"--A_nodata={src_nd}", "--NoDataValue=255"]

        print("[RUN]", " ".join(shlex.quote(c) for c in calc_cmd))
        subprocess.run(calc_cmd, check=True)

        # 2) Warp the binary mask to the template grid using average resampling
        #    -> this yields fraction burned (0..1). Then scale to percent (0..100).
        #    We do this in two steps to keep things explicit and robust.

        tmp_frac = dst.with_suffix(".frac.tif")
        warp_cmd = [
            "gdalwarp",
            "-overwrite",
            "-multi",
            "-wo", "NUM_THREADS=ALL_CPUS",
            "-r", "average",
            "-t_srs", tmpl["proj_wkt"],
            "-te", str(tmpl["bounds"][0]), str(tmpl["bounds"][1]),
            str(tmpl["bounds"][2]), str(tmpl["bounds"][3]),
            "-ts", str(tmpl["xsize"]), str(tmpl["ysize"]),
            "-ot", "Float32",
            "-dstnodata", "nan",
            "-co", "COMPRESS=LZW",
            "-co", "TILED=YES",
            "-co", "BIGTIFF=IF_SAFER",
        ]
        # ignore 255 (nodata in mask) during averaging
        if src_nd is not None:
            warp_cmd += ["-srcnodata", "255"]
        warp_cmd += [tmp_bin.as_posix(), tmp_frac.as_posix()]

        print("[RUN]", " ".join(shlex.quote(c) for c in warp_cmd))
        subprocess.run(warp_cmd, check=True)

        # 3) Scale to percent (0..100) and write final
        #    (gdal_calc is block-wise; cheap vs loading full array in Python)
        scale_cmd = [
            "gdal_calc.py",
            "-A", tmp_frac.as_posix(),
            "--outfile", dst.as_posix(),
            "--type=Float32",
            "--calc", "A*100.0",
            "--NoDataValue=nan",
            "--creation-option=COMPRESS=LZW",
            "--creation-option=TILED=YES",
            "--creation-option=BIGTIFF=IF_SAFER",
        ]
        print("[RUN]", " ".join(shlex.quote(c) for c in scale_cmd))
        subprocess.run(scale_cmd, check=True)

        # 4) Clean up intermediates
        for tmp in (tmp_bin, tmp_frac):
            try:
                tmp.unlink(missing_ok=True)
                aux = tmp.with_suffix(tmp.suffix + ".aux.xml")
                aux.unlink(missing_ok=True)
            except Exception:
                pass

        # 5) Verify exact grid match
        if verify_match(dst.as_posix(), tmpl):
            print(f"[OK] {src.name} -> {dst.name} (burned % per 4km cell)")
        else:
            print(f"[WARN] {dst} written but does not exactly match template grid")

    print("-" * 80)
    print("Done.")

if __name__ == "__main__":
    main()


Template grid:
  size (W,H): 2242 1934
  bounds    : (-4602000.0, -3485000.0, 4366000.0, 4251000.0)
  pixel size: (4000.0, -4000.0)
--------------------------------------------------------------------------------
[RUN] gdal_calc.py -A /explore/nobackup/people/spotter5/clelland_fire_ml/fire_cci_us/2001.tif --outfile /explore/nobackup/people/spotter5/clelland_fire_ml/fire_cci_us_temgrid_frac/2001.bin.tif --type=Byte --calc '(A>0)' --creation-option=COMPRESS=LZW --creation-option=TILED=YES --creation-option=BIGTIFF=IF_SAFER


  __import__('pkg_resources').run_script('GDAL==3.5.0', 'gdal_calc.py')


0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 