In [61]:
import os
import subprocess
import sys
import importlib.util
import earthaccess
from pathlib import Path
import h5py

In [39]:
packages = [
    {"import_name": "openai", "pip_name": "openai"},
    {"import_name": "numpy", "pip_name": "numpy"},
    {"import_name": "h5py", "pip_name": "h5py"},
    {"import_name": "earthaccess", "pip_name": "earthaccess"},
    {"import_name": "yaml", "pip_name": "pyyaml"}
]

for package in packages:
    spec = importlib.util.find_spec(package["import_name"])
    if spec is None:
        print(f"Installing {package['import_name']}")
        subprocess.run([sys.executable, "-m", "pip", "install", package["pip_name"], "-q"])

In [62]:
EARTHDATA_USER = os.getenv("EARTHDATA_USERNAME")
EARTHDATA_PASS = os.getenv("EARTHDATA_PASSWORD")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY_openai_to_z")

# very small AOI near the headwaters of the Xingu River (decimal degrees)
AOI_BBOX = (-53.75, -11.80, -53.55, -11.60)          # (min lon, min lat, max lon, max lat)
TEMPORAL = ("2024-11-01", "2024-11-30")              # YYYY-MM-DD

OUT_DIR = Path("outputs")
RAW_DIR = Path("data/raw")
OUT_DIR.mkdir(parents=True, exist_ok=True)
RAW_DIR.mkdir(parents=True, exist_ok=True)

MODEL_NAME = "o4-mini"
np.random.seed(42)

In [63]:
print("🔑  Logging in to Earthdata…")
earthaccess.login(EARTHDATA_USER, EARTHDATA_PASS)

🔑  Logging in to Earthdata…


<earthaccess.auth.Auth at 0x119e9e660>

In [64]:
import earthaccess
earthaccess.login()                               # should prompt for creds once
bbox_big = (-55, -13, -52, -10)                   # 3°×3° around headwaters
results = earthaccess.search_data(
    short_name="GEDI02_A",
    bounding_box=bbox_big,
    temporal=("2024-10-01", "2025-01-31"),        # four-month window
)
print(len(results), "granules found")
# for g in results[:3]:
#     print(g.title)


RuntimeError: {"errors":["An Internal Error has occurred."]}

In [67]:
granule = results[0]

In [71]:
granule_url = granule.data_links()[0]
granule_id = Path(granule_url).name

In [73]:
print(granule_url)

https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/GEDI02_A.002/GEDI02_A_2024277105416_O32903_01_T07097_02_004_02_V002/GEDI02_A_2024277105416_O32903_01_T07097_02_004_02_V002.h5


In [83]:
# after you’ve called earthaccess.login()
granule = results[0]

# this handles token + redirects automatically
local_path = earthaccess.download(
    granule,
    local_path=RAW_DIR,
)[0]

print(f"File saved to {local_path}")


QUEUEING TASKS | :   0%|          | 0/1 [00:00<?, ?it/s]

PROCESSING TASKS | :   0%|          | 0/1 [00:00<?, ?it/s]

COLLECTING RESULTS | :   0%|          | 0/1 [00:00<?, ?it/s]

File saved to data/raw/GEDI02_A_2024277105416_O32903_01_T07097_02_004_02_V002.h5


In [88]:
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)

with h5py.File(local_path, "r") as h5:
    beams = [k for k in h5.keys() if k.startswith("BEAM")]
    beam = beams[0]                             # good enough for a peek

    lat_ds = h5[f"{beam}/lat_lowestmode"]
    rh_ds  = h5[f"{beam}/rh"]                   # (n_shots, 101) array

    n_shots = lat_ds.shape[0]
    step = max(n_shots // 10_000, 1)            # keep ≤10 k points
    idx = np.arange(0, n_shots, step)
    rh100 = rh_ds[idx, 99]                      # canopy-top height (m)

    print(f"Shots in beam           : {n_shots:,}")
    print(f"Sampled shots           : {idx.size}")
    print(f"RH100 min / max (m)     : {rh100.min():.1f} / {rh100.max():.1f}")
    print(f"RH100 ≤ 10 m (clearings): {(rh100 <= 10).sum()} / {idx.size}")


Shots in beam           : 118,917
Sampled shots           : 10811
RH100 min / max (m)     : 0.0 / 35.0
RH100 ≤ 10 m (clearings): 10145 / 10811


In [93]:
# ---------------------------------------------------------------------
# 4. quick-look histogram (non-blocking)
# ---------------------------------------------------------------------
plt.figure(figsize=(6, 4))
plt.hist(rh100, bins=60)
plt.title(f"GEDI RH100 distribution – {granule_id}")
plt.xlabel("Canopy-top height (m)")
plt.ylabel("Shot count (sample 1 %)")
plt.tight_layout()

png_out = RAW_DIR / (granule_id.replace(".h5", "_hist.png"))
plt.savefig(png_out, dpi=150)
plt.close()
print(f"Histogram saved to {png_out}")

Histogram saved to data/raw/GEDI02_A_2024277105416_O32903_01_T07097_02_004_02_V002_hist.png


In [92]:
import matplotlib.pyplot as plt