# Checkpoint 2 — Data Search & Acquisition

Goals:
- Define an Area of Interest (AOI) in Brazil.
- Search for Sentinel‑2 L2A scenes with low cloud cover in the AOI.
- Save a small artifact (metadata) and log reproducible evidence.
- Visualize AOI and footprints on a map.

Notes: Uses the public STAC endpoint at `earth-search.aws.element84.com`.


In [None]:
from datetime import datetime
import json
from pathlib import Path

from dotenv import load_dotenv
import folium
import requests

from zexplorer.data_id_logger import DataSource, ModelInfo, log_evidence
from zexplorer.geoutils import bbox_from_center

load_dotenv()
DATA_DIR = Path("data/stac_search")
DATA_DIR.mkdir(parents=True, exist_ok=True)

CENTER_LAT = -10.0  # example in Brazil
CENTER_LON = -52.0
HALF_SIZE_M = 50_000  # 50 km half-size
BBOX = bbox_from_center(CENTER_LAT, CENTER_LON, HALF_SIZE_M)  # [minlon, minlat, maxlon, maxlat]
BBOX

In [None]:
def stac_search_s2(bbox, limit=10, max_cloud=10):
    """Search Sentinel-2 L2A via Earth Search (AWS Element 84).
    bbox: [minlon, minlat, maxlon, maxlat]
    """
    endpoint = "https://earth-search.aws.element84.com/v1/search"
    payload = {
        "collections": ["sentinel-2-l2a"],
        "bbox": bbox,
        "limit": limit,
        "query": {"eo:cloud_cover": {"lt": max_cloud}},
        "sort": [{"field": "properties.datetime", "direction": "desc"}],
    }
    r = requests.post(endpoint, json=payload, timeout=30)
    r.raise_for_status()
    data = r.json()
    return data.get("features", [])


items = stac_search_s2(BBOX, limit=10, max_cloud=10)
len(items), [it["id"] for it in items[:3]]

In [None]:
# Save a small artifact (search results)
ts = datetime.utcnow().strftime("%Y%m%dT%H%M%SZ")
out_path = DATA_DIR / f"s2_search_{ts}.json"
with out_path.open("w", encoding="utf-8") as f:
    json.dump({"bbox": BBOX, "count": len(items), "items": items}, f)
out_path

In [None]:
# Map AOI and footprints
m = folium.Map(location=[CENTER_LAT, CENTER_LON], zoom_start=8, tiles="OpenStreetMap")
minlon, minlat, maxlon, maxlat = BBOX
folium.Rectangle([[minlat, minlon], [maxlat, maxlon]], color="red", fill=False, weight=2).add_to(m)

for it in items:
    geom = it.get("geometry", {})
    if geom.get("type") == "Polygon":
        coords = geom.get("coordinates", [[]])[0]
        latlon = [[y, x] for x, y in coords]
        folium.PolyLine(latlon, color="blue", weight=1, opacity=0.6).add_to(m)

m

In [None]:
# Log a single evidence record for the first item (if any)
if items:
    first = items[0]
    item_id = first.get("id", "unknown")
    # Try to find a stable item URL
    links = first.get("links", [])
    self_url = next((lnk.get("href") for lnk in links if lnk.get("rel") == "self"), None)

    rec = log_evidence(
        lat=CENTER_LAT,
        lon=CENTER_LON,
        candidate_id=f"cp2-{item_id}",
        sources=[DataSource(type="Sentinel-2 L2A", id=item_id, url=self_url)],
        bbox=BBOX,
        model=ModelInfo(name="manual", version="0"),
        notes="Checkpoint 2: STAC search + evidence log",
        extra={"stac_endpoint": "earth-search.aws.element84.com"},
    )
    rec
else:
    print("No items found in AOI; adjust AOI or cloud filter.")

## TODO
- Adjust AOI center/size and cloud cover as needed.
- Consider persisting a small thumbnail URL or STAC properties.
- Add more sources (e.g., Landsat, DEM tiles) to `sources`.
- Write brief notes about why scenes are relevant for candidates.
