This notebook authenticates EE, configures a small region/time window, triggers TFRecord exports using data_export.export_ee_data, and shows how to monitor and optionally download results.

In [None]:
import json
from datetime import datetime
from pathlib import Path    

import ee
# Optional for downloading TFRecords later:
# from google.cloud import storage

from data_export import export_ee_data, ee_utils

In [2]:
try:
    ee.Authenticate()
except Exception as exc:
    print("Auth skipped or already configured:", exc)

In [3]:
ee.Initialize()
print("Earth Engine initialized", datetime.utcnow())

Earth Engine initialized 2025-10-14 07:45:27.328310


In [15]:
params = {
    "bucket": "lmudl-wildfire-compilation-bucket",     # e.g., "my-bucket"
    "folder": "demo",                 # subfolder inside bucket
    "prefix": "sample",               # file prefix
    "start_date": "2021-08-01",
    "end_date": "2021-08-15",
    "kernel_size": 64,
    "sampling_scale": 1000,
    "eval_split_ratio": 0.05,
    "num_samples_per_file": 100,
    # [west, south, east, north] in degrees; pick a small test area
    "region_bbox": [-122.6, 39.5, -120.9, 41.0],
}
print(json.dumps(params, indent=2))

{
  "bucket": "lmudl-wildfire-compilation-bucket",
  "folder": "demo",
  "prefix": "sample",
  "start_date": "2021-08-01",
  "end_date": "2021-08-15",
  "kernel_size": 64,
  "sampling_scale": 1000,
  "eval_split_ratio": 0.05,
  "num_samples_per_file": 100,
  "region_bbox": [
    -122.6,
    39.5,
    -120.9,
    41.0
  ]
}


In [13]:
def export_slice(config: dict):
    required = ["bucket", "folder", "prefix", "start_date", "end_date"]
    for key in required:
        if not config.get(key):
            raise ValueError(f"Missing required parameter: {key}")

    bbox = config.get("region_bbox") or ee_utils.COORDINATES["US"]
    # Override the default region used by export_ee_data
    ee_utils.COORDINATES["US"] = bbox

    start_date = ee.Date(config["start_date"])
    end_date = ee.Date(config["end_date"])

    export_ee_data.export_ml_datasets(
        bucket=config["bucket"],
        folder=config["folder"],
        start_date=start_date,
        end_date=end_date,
        prefix=config.get("prefix", ""),
        kernel_size=config.get("kernel_size", ee_utils.DEFAULT_KERNEL_SIZE),
        sampling_scale=config.get("sampling_scale", ee_utils.DEFAULT_SAMPLING_RESOLUTION),
        eval_split_ratio=config.get("eval_split_ratio", ee_utils.DEFAULT_EVAL_SPLIT),
        num_samples_per_file=config.get("num_samples_per_file", ee_utils.DEFAULT_LIMIT_PER_EE_CALL),
    )
    print("Export triggered. Check https://code.earthengine.google.com/tasks for progress.")

In [16]:
export_slice(params)

Export triggered. Check https://code.earthengine.google.com/tasks for progress.


In [9]:
def list_tasks(limit: int = 10):
    tasks = ee.batch.Task.list()
    for task in tasks[:limit]:
        status = task.status()
        print(f"{status['id']} | {status.get('state')} | {status.get('description')}")

list_tasks()

In [17]:
from google.cloud import storage

def download_exports(config: dict, destination: Path):
    destination.mkdir(parents=True, exist_ok=True)
    client = storage.Client()
    bucket = client.bucket(config["bucket"])
    prefix = f"{config['folder'].strip('/')}/{config['prefix']}".strip('/')

    blobs = list(bucket.list_blobs(prefix=prefix))
    print(f"Found {len(blobs)} files under gs://{config['bucket']}/{prefix}")
    for blob in blobs:
        target = destination / Path(blob.name).name
        print("Downloading", blob.name, "->", target)
        blob.download_to_filename(target)

# Example usage after tasks complete:
# download_exports(params, Path("exports"))

In [18]:
download_exports(params, Path("exports"))

DefaultCredentialsError: Your default credentials were not found. To set up Application Default Credentials, see https://cloud.google.com/docs/authentication/external/set-up-adc for more information.