This notebook authenticates EE, configures a small region/time window, triggers TFRecord exports using data_export.export_ee_data, and shows how to monitor and optionally download results.

In [1]:
import json
from datetime import datetime
from pathlib import Path    

import ee
# Optional for downloading TFRecords later:
# from google.cloud import storage

from data_export import export_ee_data, ee_utils

In [2]:
try:
    ee.Authenticate()
except Exception as exc:
    print("Auth skipped or already configured:", exc)

In [3]:
ee.Initialize()
print("Earth Engine initialized", datetime.utcnow())

Earth Engine initialized 2025-10-21 02:11:17.352662


In [13]:
params = {
    "bucket": "lmudl-wildfire-compilation-bucket",     # e.g., "my-bucket"
    "folder": "eaton",                 # subfolder inside bucket
    "prefix": "eaton_sample",               # file prefix
    "start_date": "2025-01-07",
    "end_date": "2025-01-30",
    "kernel_size": 64,
    "sampling_scale": 1000,
    "eval_split_ratio": 0.05,
    "num_samples_per_file": 100,
    "region_bbox": [-118.19466979980469, 34.147511224015496, -118.00034912109375, 34.24916929738899]
}
print(json.dumps(params, indent=2))

{
  "bucket": "lmudl-wildfire-compilation-bucket",
  "folder": "eaton",
  "prefix": "eaton_sample",
  "start_date": "2025-01-07",
  "end_date": "2025-01-30",
  "kernel_size": 64,
  "sampling_scale": 1000,
  "eval_split_ratio": 0.05,
  "num_samples_per_file": 100,
  "region_bbox": [
    -118.19466979980469,
    34.147511224015496,
    -118.00034912109375,
    34.24916929738899
  ]
}


In [14]:
def export_slice(config: dict):
    required = ["bucket", "folder", "prefix", "start_date", "end_date"]
    for key in required:
        if not config.get(key):
            raise ValueError(f"Missing required parameter: {key}")

    bbox = config.get("region_bbox") or ee_utils.COORDINATES["US"]
    # Override the default region used by export_ee_data
    ee_utils.COORDINATES["US"] = bbox

    start_date = ee.Date(config["start_date"])
    end_date = ee.Date(config["end_date"])

    export_ee_data.export_ml_datasets(
        bucket=config["bucket"],
        folder=config["folder"],
        start_date=start_date,
        end_date=end_date,
        prefix=config.get("prefix", ""),
        kernel_size=config.get("kernel_size", ee_utils.DEFAULT_KERNEL_SIZE),
        sampling_scale=config.get("sampling_scale", ee_utils.DEFAULT_SAMPLING_RESOLUTION),
        eval_split_ratio=config.get("eval_split_ratio", ee_utils.DEFAULT_EVAL_SPLIT),
        num_samples_per_file=config.get("num_samples_per_file", ee_utils.DEFAULT_LIMIT_PER_EE_CALL),
    )
    print("Export triggered. Check https://code.earthengine.google.com/tasks for progress.")

In [15]:
export_slice(params)

Export triggered. Check https://code.earthengine.google.com/tasks for progress.


In [16]:
def list_tasks(limit: int = 10):
    tasks = ee.batch.Task.list()
    for task in tasks[:limit]:
        status = task.status()
        print(f"{status['id']} | {status.get('state')} | {status.get('description')}")

list_tasks()

BG57KCZQ353KV4PNMTVTHBF5 | READY | train_eaton_sample_019
ESG6QWR6CROLNRHCDUNKLY73 | READY | train_eaton_sample_018
ZD2XMAFN2UY5ELJHW6E56LXT | READY | train_eaton_sample_017
FPHSSZ6AS3ST5ZXR4SHTYRTG | READY | train_eaton_sample_016
LZEHF54QJUXWCV3APIBLWURT | READY | train_eaton_sample_015
FL7V6GKHY7ICYQFXUC7AIZEK | READY | train_eaton_sample_014
56XFX6N7D7M5RMRTO4ZBN5HC | READY | train_eaton_sample_013
7NHRLBDKU5FOEQVKT5IZWTY5 | READY | train_eaton_sample_012
GTRIK2KSODVOPKLT6JNLAITJ | READY | train_eaton_sample_011
C5MC4BA5LQZD3RRXWVROTUAK | READY | train_eaton_sample_010


In [10]:
from google.cloud import storage

def download_exports(config: dict, destination: Path):
    destination.mkdir(parents=True, exist_ok=True)
    client = storage.Client()
    bucket = client.bucket(config["bucket"])

    folder = str(config.get("folder", "")).strip("/")
    gcs_prefix = (folder + "/") if folder else ""

    blobs = list(bucket.list_blobs(prefix=gcs_prefix))

    print(f"Found {len(blobs)} files under gs://{config['bucket']}/{gcs_prefix}")
    for blob in blobs:
        target = destination / Path(blob.name).name
        print("Downloading", blob.name, "->", target)
        blob.download_to_filename(target)


In [11]:
%%!
gcloud auth application-default login

['Your browser has been opened to visit:',
 '',
 '    https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=764086051850-6qr4p6gpi6hn506pt8ejuq83di341hur.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A8085%2F&scope=openid+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fuserinfo.email+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fcloud-platform+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fsqlservice.login&state=nAplcergvJ8wFrvo4haT1kSyBn5kLM&access_type=offline&code_challenge=-EQQxIhZD_QX21qNRI7INsjQPCBhapIJj1E3oLN4mYw&code_challenge_method=S256',
 '',
 '',
 'Credentials saved to file: [C:\\Users\\Arye\\AppData\\Roaming\\gcloud\\application_default_credentials.json]',
 '',
 'These credentials will be used by any library that requests Application Default Credentials (ADC).',
 '',
 'Quota project "ee-lmu-deep-learning-wildfire" was added to ADC which can be used by Google client libraries for billing and quota. Note that some services may still bill the project ownin

In [17]:
download_exports(params, Path("exports"))

Found 20 files under gs://lmudl-wildfire-compilation-bucket/eaton/
Downloading eaton/train_eaton_sample_000.tfrecord.gz -> exports\train_eaton_sample_000.tfrecord.gz
Downloading eaton/train_eaton_sample_001.tfrecord.gz -> exports\train_eaton_sample_001.tfrecord.gz
Downloading eaton/train_eaton_sample_002.tfrecord.gz -> exports\train_eaton_sample_002.tfrecord.gz
Downloading eaton/train_eaton_sample_003.tfrecord.gz -> exports\train_eaton_sample_003.tfrecord.gz
Downloading eaton/train_eaton_sample_004.tfrecord.gz -> exports\train_eaton_sample_004.tfrecord.gz
Downloading eaton/train_eaton_sample_005.tfrecord.gz -> exports\train_eaton_sample_005.tfrecord.gz
Downloading eaton/train_eaton_sample_006.tfrecord.gz -> exports\train_eaton_sample_006.tfrecord.gz
Downloading eaton/train_eaton_sample_007.tfrecord.gz -> exports\train_eaton_sample_007.tfrecord.gz
Downloading eaton/train_eaton_sample_008.tfrecord.gz -> exports\train_eaton_sample_008.tfrecord.gz
Downloading eaton/train_eaton_sample_009.t