In [1]:
import os
import glob
import numpy as np
import pandas as pd
import rioxarray as rxr


TEST 1 — Valid Date Folder Discovery

In [2]:
def test_date_folders(data_dir):
    valid = []
    invalid = []

    for d in sorted(os.listdir(data_dir)):
        path = os.path.join(data_dir, d)
        if not os.path.isdir(path):
            continue
        try:
            date = pd.to_datetime(d, format="%d%m%y")
            valid.append((d, date))
        except ValueError:
            invalid.append(d)

    print("✅ Valid date folders:")
    for d, dt in valid:
        print(f"  {d} → {dt.date()}")

    print("\n❌ Invalid folders (ignored):")
    for d in invalid:
        print(f"  {d}")

    assert len(valid) > 0, "❌ No valid date folders found"
    return valid


TEST 2 — Required Band Presence (Per Date)

In [3]:
REQUIRED_BANDS = {
    "B02": "*_B02_*.tiff",
    "B04": "*_B04_*.tiff",
    "B08": "*_B08_*.tiff",
    "B11": "*_B11_*.tiff",
    "B12": "*_B12_*.tiff",
    "SCL": "*Scene_classification_map_*.tiff",
}

def test_band_completeness(data_dir, valid_dates):
    rows = []

    for d, _ in valid_dates:
        path = os.path.join(data_dir, d)
        row = {"date": d}
        for band, pattern in REQUIRED_BANDS.items():
            row[band] = len(glob.glob(os.path.join(path, pattern)))
        rows.append(row)

    df = pd.DataFrame(rows)
    print(df)

    assert (df[list(REQUIRED_BANDS.keys())] == 1).all().all(), \
        "❌ Missing or duplicate band files detected"

    print("\n✅ All required bands present for all dates")


TEST 3 — Raw Raster Health Check (One Sample Date)

In [4]:
def test_raw_raster_values(data_dir, sample_date):
    path = os.path.join(data_dir, sample_date)
    b08_path = glob.glob(os.path.join(path, "*_B08_*.tiff"))[0]

    da = rxr.open_rasterio(b08_path)

    print("Raster shape:", da.shape)
    print("Min value:", float(da.min()))
    print("Max value:", float(da.max()))

    assert np.isfinite(da.values).any(), "❌ Raster contains no finite values"
    assert da.max() > da.min(), "❌ Raster appears constant or empty"

    print("✅ Raster contains valid numeric data")


TEST 4 — AOI Intersection Sanity (Optional but Important)

In [5]:
def test_aoi_intersection(data_dir, sample_date, mine):
    path = os.path.join(data_dir, sample_date)
    b08_path = glob.glob(os.path.join(path, "*_B08_*.tiff"))[0]

    da = rxr.open_rasterio(b08_path)
    da = da.isel(band=0, drop=True)

    da_clip = da.rio.clip(mine.geometry.values, mine.crs)

    finite = np.isfinite(da_clip.values).sum()
    print("Finite pixels inside AOI:", finite)

    assert finite > 0, "❌ AOI does not intersect raster footprint"
    print("✅ AOI intersects raster")


TEST 5 — SCL Sanity (Advisory, Not Blocking)

In [6]:
def test_scl_content(data_dir, sample_date):
    path = os.path.join(data_dir, sample_date)
    scl_path = glob.glob(os.path.join(path, "*Scene_classification_map_*.tiff"))[0]

    scl = rxr.open_rasterio(scl_path)
    scl = scl.isel(band=0, drop=True)

    unique, counts = np.unique(scl.values, return_counts=True)
    stats = dict(zip(unique.tolist(), counts.tolist()))

    print("SCL value distribution:")
    print(stats)

    cloud_classes = {3, 8, 9, 10, 11}
    if not any(c in stats for c in cloud_classes):
        print("⚠️ No standard cloud classes found (acceptable for small AOIs)")
    else:
        print("✅ Cloud classes detected")


In [12]:
DATA_DIR = "content/monitoring"

In [13]:
valid_dates = test_date_folders(DATA_DIR)
test_band_completeness(DATA_DIR, valid_dates)

sample_date = valid_dates[0][0]
test_raw_raster_values(DATA_DIR, sample_date)
test_scl_content(DATA_DIR, sample_date)
# test_aoi_intersection(DATA_DIR, sample_date, mine)  # optional


✅ Valid date folders:
  010123 → 2023-01-01
  010822 → 2022-08-01
  010823 → 2023-08-01
  020222 → 2022-02-02
  020223 → 2023-02-02
  020623 → 2023-06-02
  020723 → 2023-07-02
  030122 → 2022-01-03
  030422 → 2022-04-03
  030423 → 2023-04-03
  040322 → 2022-03-04
  040323 → 2023-03-04
  041122 → 2022-11-04
  041123 → 2023-11-04
  050923 → 2023-09-05
  070622 → 2022-06-07
  070722 → 2022-07-07
  081223 → 2023-12-08
  091222 → 2022-12-09
  100922 → 2022-09-10
  101023 → 2023-10-10
  110822 → 2022-08-11
  120222 → 2022-02-12
  130522 → 2022-05-13
  130523 → 2023-05-13
  151022 → 2022-10-15
  170623 → 2023-06-17
  170722 → 2022-07-17
  180122 → 2022-01-18
  220222 → 2022-02-22
  230322 → 2022-03-23
  230422 → 2022-04-23
  230423 → 2023-04-23
  241122 → 2022-11-24
  270223 → 2023-02-27
  270622 → 2022-06-27
  270723 → 2023-07-27
  280123 → 2023-01-28
  280522 → 2022-05-28
  280523 → 2023-05-28
  290323 → 2023-03-29
  291123 → 2023-11-29
  291222 → 2022-12-29
  291223 → 2023-12-29
  300922 →



In [14]:
def run_all_test(DATA_DIR):
    valid_dates = test_date_folders(DATA_DIR)
    test_band_completeness(DATA_DIR, valid_dates)

    sample_date = valid_dates[0][0]
    test_raw_raster_values(DATA_DIR, sample_date)
    test_scl_content(DATA_DIR, sample_date)

In [15]:
run_all_test(DATA_DIR)

✅ Valid date folders:
  010123 → 2023-01-01
  010822 → 2022-08-01
  010823 → 2023-08-01
  020222 → 2022-02-02
  020223 → 2023-02-02
  020623 → 2023-06-02
  020723 → 2023-07-02
  030122 → 2022-01-03
  030422 → 2022-04-03
  030423 → 2023-04-03
  040322 → 2022-03-04
  040323 → 2023-03-04
  041122 → 2022-11-04
  041123 → 2023-11-04
  050923 → 2023-09-05
  070622 → 2022-06-07
  070722 → 2022-07-07
  081223 → 2023-12-08
  091222 → 2022-12-09
  100922 → 2022-09-10
  101023 → 2023-10-10
  110822 → 2022-08-11
  120222 → 2022-02-12
  130522 → 2022-05-13
  130523 → 2023-05-13
  151022 → 2022-10-15
  170623 → 2023-06-17
  170722 → 2022-07-17
  180122 → 2022-01-18
  220222 → 2022-02-22
  230322 → 2022-03-23
  230422 → 2022-04-23
  230423 → 2023-04-23
  241122 → 2022-11-24
  270223 → 2023-02-27
  270622 → 2022-06-27
  270723 → 2023-07-27
  280123 → 2023-01-28
  280522 → 2022-05-28
  280523 → 2023-05-28
  290323 → 2023-03-29
  291123 → 2023-11-29
  291222 → 2022-12-29
  291223 → 2023-12-29
  300922 →

