In [1]:
# Notebook setup: imports and configuration
from osgeo import gdal
import glob, os
from pathlib import Path
from typing import List



In [2]:

# === EDIT THESE IF NEEDED ===
BASE = Path("/projects/my-private-bucket/HLS-1DCNN-AGB/data/tif/HLS_composites/monthly/br_af_grid60km_prj_evi2_max")
# YEARS = [2018, 2019, 2020, 2021, 2022]
YEARS = [2018, 2019]

MONTHS = list(range(1, 13))
OUTDIR = BASE / "vrt"   # change if you want VRTs elsewhere
RESAMPLE = "nearest"
RESOLUTION = "highest"  # highest | lowest | average

OUTDIR.mkdir(parents=True, exist_ok=True)


In [3]:
gdal.UseExceptions()
print("BASE   :", BASE)
print("OUTDIR :", OUTDIR)
print("Years  :", YEARS)
print("Months :", MONTHS)

BASE   : /projects/my-private-bucket/HLS-1DCNN-AGB/data/tif/HLS_composites/monthly/br_af_grid60km_prj_evi2_max
OUTDIR : /projects/my-private-bucket/HLS-1DCNN-AGB/data/tif/HLS_composites/monthly/br_af_grid60km_prj_evi2_max/vrt
Years  : [2018, 2019]
Months : [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]


In [4]:
# Helper functions
# Helpers: dot-separated ref names and safer band-name sanitize
from typing import List, Optional
from pathlib import Path
import glob, re
from osgeo import gdal

def find_month_files(base: Path, year: int, month: int) -> List[str]:
    pattern = str(base / "tile_*" / str(year) / f"HLS_*_{month:02d}-*_{year}_{year}_maxevi2.tif")
    return sorted(glob.glob(pattern))

def get_band_names(sample_path: str) -> List[str]:
    ds = gdal.Open(sample_path, gdal.GA_ReadOnly)
    if ds is None:
        return []
    names = []
    for i in range(1, ds.RasterCount + 1):
        b = ds.GetRasterBand(i)
        name = (b.GetDescription() or
                b.GetMetadataItem("BAND_NAME") or
                f"band{i}")
        names.append(name)
    ds = None
    return names

def _sanitize_band_name(name: str) -> str:
    # keep separators clean: replace commas/dots/spaces/slashes with '-'
    if not name:
        return "band"
    name = name.replace(",", "-").replace(".", "-")
    name = name.replace("/", "-").replace("\\", "-").replace(" ", "-")
    # keep only safe chars
    name = re.sub(r"[^A-Za-z0-9_+-]", "", name)
    return name or "band"

def _ensure_unique(names: List[str]) -> List[str]:
    seen, out = {}, []
    for n in names:
        if n not in seen:
            seen[n] = 1
            out.append(n)
        else:
            seen[n] += 1
            out.append(f"{n}-{seen[n]}")
    return out

def make_ref_name(year: int, month: int, band_name: str) -> str:
    # Dot-separated ref/base filename (no extension)
    return f"HLS.{year}.{month:02d}.maxevi2.{band_name}"

def build_vrt_for_band(out_vrt: Path,
                       src_files: List[str],
                       band_index: int,
                       band_name: str,
                       resample: str = "nearest",
                       resolution: str = "highest",
                       ref_name: Optional[str] = None,
                       year: Optional[int] = None,
                       month: Optional[int] = None) -> None:
    if out_vrt.exists():
        out_vrt.unlink()

    opts = gdal.BuildVRTOptions(
        bandList=[band_index],
        resampleAlg=resample,
        resolution=resolution,
        separate=False,
    )
    ds = gdal.BuildVRT(str(out_vrt), src_files, options=opts)
    if ds is None:
        raise RuntimeError(f"gdal.BuildVRT failed: {out_vrt}")

    # Dataset metadata
    if ref_name:
        ds.SetMetadataItem("REF_NAME", ref_name)
    if year is not None:
        ds.SetMetadataItem("YEAR", str(year))
    if month is not None:
        ds.SetMetadataItem("MONTH", f"{month:02d}")
    ds.SetMetadataItem("PRODUCT", "maxevi2")
    ds.SetMetadataItem("BAND_NAME", band_name)

    # Band 1 metadata/description
    rb = ds.GetRasterBand(1)
    if rb is not None:
        rb.SetDescription(band_name)
        rb.SetMetadataItem("BAND_NAME", band_name)

    ds = None


In [5]:
# Check number of tiles for each month and year
from collections import defaultdict

found = defaultdict(int)
for y in YEARS:
    for m in MONTHS:
        files = find_month_files(BASE, y, m)
        if files:
            found[(y, m)] = len(files)

print(f"Months with data: {len(found)}")
for (y, m), n in sorted(found.items()):
    print(f"{y}-{m:02d}: {n} file(s)")

Months with data: 24
2018-01: 9 file(s)
2018-02: 9 file(s)
2018-03: 9 file(s)
2018-04: 9 file(s)
2018-05: 9 file(s)
2018-06: 9 file(s)
2018-07: 9 file(s)
2018-08: 9 file(s)
2018-09: 9 file(s)
2018-10: 9 file(s)
2018-11: 9 file(s)
2018-12: 9 file(s)
2019-01: 9 file(s)
2019-02: 9 file(s)
2019-03: 9 file(s)
2019-04: 9 file(s)
2019-05: 9 file(s)
2019-06: 9 file(s)
2019-07: 9 file(s)
2019-08: 9 file(s)
2019-09: 9 file(s)
2019-10: 9 file(s)
2019-11: 9 file(s)
2019-12: 9 file(s)


In [6]:
from tqdm.auto import tqdm

errors = []

for y in tqdm(YEARS, desc="Years"):
    for m in tqdm(MONTHS, leave=False, desc=f"Months({y})"):
        files = find_month_files(BASE, y, m)
        if not files:
            continue

        raw_names = get_band_names(files[0])
        if not raw_names:
            print(f"[WARN] No bands detected for {y}-{m:02d}; skipping.")
            continue

        band_names = _ensure_unique([_sanitize_band_name(n) for n in raw_names])

        for idx, bname in enumerate(band_names, start=1):
            ref = make_ref_name(y, m, bname)      # e.g., HLS.2020.03.maxevi2.Red
            out_vrt = OUTDIR / f"{ref}.vrt"
            try:
                build_vrt_for_band(out_vrt, files, idx, bname,
                                   resample=RESAMPLE, resolution=RESOLUTION,
                                   ref_name=ref, year=y, month=m)
            except Exception as e:
                errors.append((str(out_vrt), str(e)))

print("Done.")
if errors:
    print("\nSome VRTs failed:")
    for path, msg in errors:
        print(" -", path, "->", msg)
else:
    print("All VRTs built successfully.")



Years:   0%|          | 0/2 [00:00<?, ?it/s]

Months(2018):   0%|          | 0/12 [00:00<?, ?it/s]

Months(2019):   0%|          | 0/12 [00:00<?, ?it/s]

Done.
All VRTs built successfully.
