This notebook authenticates EE, configures a small region/time window, triggers TFRecord exports using data_export.export_ee_data, and shows how to monitor and optionally download results.

In [4]:
import json
from datetime import datetime
from pathlib import Path    

import ee
# Optional for downloading TFRecords later:
# from google.cloud import storage

from data_export import export_ee_data, ee_utils

In [5]:
try:
    ee.Authenticate()
except Exception as exc:
    print("Auth skipped or already configured:", exc)

In [6]:
ee.Initialize()
print("Earth Engine initialized", datetime.utcnow())

Earth Engine initialized 2025-10-15 20:47:01.042878


In [5]:
params = {
    "bucket": "lmudl-wildfire-compilation-bucket",     # e.g., "my-bucket"
    "folder": "demo",                 # subfolder inside bucket
    "prefix": "sample",               # file prefix
    "start_date": "2021-08-01",
    "end_date": "2021-08-15",
    "kernel_size": 64,
    "sampling_scale": 1000,
    "eval_split_ratio": 0.05,
    "num_samples_per_file": 100,
    # [west, south, east, north] in degrees; pick a small test area
    "region_bbox": [-122.6, 39.5, -120.9, 41.0],
}
print(json.dumps(params, indent=2))

{
  "bucket": "lmudl-wildfire-compilation-bucket",
  "folder": "demo",
  "prefix": "sample",
  "start_date": "2021-08-01",
  "end_date": "2021-08-15",
  "kernel_size": 64,
  "sampling_scale": 1000,
  "eval_split_ratio": 0.05,
  "num_samples_per_file": 100,
  "region_bbox": [
    -122.6,
    39.5,
    -120.9,
    41.0
  ]
}


In [8]:
def export_slice(config: dict):
    required = ["bucket", "folder", "prefix", "start_date", "end_date"]
    for key in required:
        if not config.get(key):
            raise ValueError(f"Missing required parameter: {key}")

    bbox = config.get("region_bbox") or ee_utils.COORDINATES["US"]
    # Override the default region used by export_ee_data
    ee_utils.COORDINATES["US"] = bbox

    start_date = ee.Date(config["start_date"])
    end_date = ee.Date(config["end_date"])

    export_ee_data.export_ml_datasets(
        bucket=config["bucket"],
        folder=config["folder"],
        start_date=start_date,
        end_date=end_date,
        prefix=config.get("prefix", ""),
        kernel_size=config.get("kernel_size", ee_utils.DEFAULT_KERNEL_SIZE),
        sampling_scale=config.get("sampling_scale", ee_utils.DEFAULT_SAMPLING_RESOLUTION),
        eval_split_ratio=config.get("eval_split_ratio", ee_utils.DEFAULT_EVAL_SPLIT),
        num_samples_per_file=config.get("num_samples_per_file", ee_utils.DEFAULT_LIMIT_PER_EE_CALL),
    )
    print("Export triggered. Check https://code.earthengine.google.com/tasks for progress.")

In [16]:
export_slice(params)

Export triggered. Check https://code.earthengine.google.com/tasks for progress.


In [9]:
def list_tasks(limit: int = 10):
    tasks = ee.batch.Task.list()
    for task in tasks[:limit]:
        status = task.status()
        print(f"{status['id']} | {status.get('state')} | {status.get('description')}")

list_tasks()

VTHHN6IPP7E3JIE2UBLEDVY5 | COMPLETED | train_sample_003
Z5FQYZFU665XXABD7I6C35I3 | COMPLETED | train_sample_002
ZTWSXCT4VBBNUVIQHLAJ5EL5 | COMPLETED | train_sample_001
3H5U6E4QUKINSRNHIJ5RR77X | COMPLETED | train_sample_000
H4ZIUA2QMJGJ7JLWEGBQZKW3 | FAILED | train_sample_003
EB6K56WJSZEQAROXYXF2X4GQ | FAILED | train_sample_002
EZDG3AJOFPQDK7K5GRN6OHF6 | FAILED | train_sample_001
XML27USWHV6BVOLN7QFQFS5W | FAILED | train_sample_000


In [6]:
from google.cloud import storage

def download_exports(config: dict, destination: Path):
    destination.mkdir(parents=True, exist_ok=True)
    client = storage.Client()
    bucket = client.bucket(config["bucket"])

    folder = str(config.get("folder", "")).strip("/")
    gcs_prefix = (folder + "/") if folder else ""

    blobs = list(bucket.list_blobs(prefix=gcs_prefix))

    print(f"Found {len(blobs)} files under gs://{config['bucket']}/{gcs_prefix}")
    for blob in blobs:
        target = destination / Path(blob.name).name
        print("Downloading", blob.name, "->", target)
        blob.download_to_filename(target)


In [2]:
%%!
gcloud auth application-default login

['Your browser has been opened to visit:',
 '',
 '    https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=764086051850-6qr4p6gpi6hn506pt8ejuq83di341hur.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A8085%2F&scope=openid+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fuserinfo.email+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fcloud-platform+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fsqlservice.login&state=JKprEZxjyuZ6hSM2SkyZGeOIjk2WZi&access_type=offline&code_challenge=CENTozSPNRzN_vpLT4yni3oxZeWtNYYHFkDdSaR65wY&code_challenge_method=S256',
 '',
 '',
 'Credentials saved to file: [C:\\Users\\Arye\\AppData\\Roaming\\gcloud\\application_default_credentials.json]',
 '',
 'These credentials will be used by any library that requests Application Default Credentials (ADC).',
 '',
 'Quota project "ee-lmu-deep-learning-wildfire" was added to ADC which can be used by Google client libraries for billing and quota. Note that some services may still bill the project ownin

In [7]:
download_exports(params, Path("exports"))

Found 5 files under gs://lmudl-wildfire-compilation-bucket/demo/
Downloading demo/ -> exports\demo
Downloading demo/train_sample_000.tfrecord.gz -> exports\train_sample_000.tfrecord.gz
Downloading demo/train_sample_001.tfrecord.gz -> exports\train_sample_001.tfrecord.gz
Downloading demo/train_sample_002.tfrecord.gz -> exports\train_sample_002.tfrecord.gz
Downloading demo/train_sample_003.tfrecord.gz -> exports\train_sample_003.tfrecord.gz


In [2]:
from google.cloud import storage
client = storage.Client()
bucket = client.bucket("lmudl-wildfire-compilation-bucket")


for blob in bucket.list_blobs(prefix=""):  # note trailing '/'
    print(f"name={blob.name!r} size={blob.size} md5={blob.md5_hash}")

name='demo/' size=0 md5=1B2M2Y8AsgTpgAmY7PhCfg==
name='train_sample_000.tfrecord.gz' size=15649033 md5=CDc/X9WFlOIt0/M9scPfig==
name='train_sample_001.tfrecord.gz' size=13871148 md5=moDi0cAuGOVP7gGIwQrEZw==
name='train_sample_002.tfrecord.gz' size=13948147 md5=gSGdb5exWysYf2lqe32h/w==
name='train_sample_003.tfrecord.gz' size=7093742 md5=2kok452ri5DJBX2pXL1ogw==
