This notebook authenticates EE, configures a small region/time window, triggers TFRecord exports using data_export.export_ee_data, and shows how to monitor and optionally download results.

In [1]:
import json
from datetime import datetime
from pathlib import Path    

import ee
# Optional for \downloading TFRecords later:
# from google.cloud import storage

from data_export import export_ee_data, ee_utils

In [2]:
try:
    ee.Authenticate()
except Exception as exc:
    print("Auth skipped or already configured:", exc)

In [3]:
ee.Initialize()
print("Earth Engine initialized", datetime.utcnow())

Earth Engine initialized 2025-11-19 03:01:34.793559


In [4]:
params = {
    "bucket": "lmudl-wildfire-compilation-bucket",     # e.g., "my-bucket"
    "folder": "eaton",                 # subfolder inside bucket
    "prefix": "eaton_sample",               # file prefix
    "start_date": "2025-1-01",
    "end_date": "2025-02-05",
    "kernel_size": 64,
    "sampling_scale": 1000,
    "eval_split_ratio": 0.05,
    "num_samples_per_file": 100,
    "region_bbox": [-118.19466979980469, 34.147511224015496, -118.00034912109375, 34.24916929738899]
}
print(json.dumps(params, indent=2))

{
  "bucket": "lmudl-wildfire-compilation-bucket",
  "folder": "eaton",
  "prefix": "eaton_sample",
  "start_date": "2025-1-01",
  "end_date": "2025-02-05",
  "kernel_size": 64,
  "sampling_scale": 1000,
  "eval_split_ratio": 0.05,
  "num_samples_per_file": 100,
  "region_bbox": [
    -118.19466979980469,
    34.147511224015496,
    -118.00034912109375,
    34.24916929738899
  ]
}


In [5]:
def export_slice(config: dict):
    required = ["bucket", "folder", "prefix", "start_date", "end_date"]
    for key in required:
        if not config.get(key):
            raise ValueError(f"Missing required parameter: {key}")

    bbox = config.get("region_bbox") or ee_utils.COORDINATES["US"]
    # Override the default region used by export_ee_data
    ee_utils.COORDINATES["US"] = bbox

    start_date = ee.Date(config["start_date"])
    end_date = ee.Date(config["end_date"])
    
    # Convert bbox to ee.Geometry.Rectangle
    geometry = ee.Geometry.Rectangle(bbox)

    export_ee_data.export_single_fire_dataset(
        bucket=config["bucket"],
        folder=config["folder"],
        start_date=start_date,
        end_date=end_date,
        geometry=geometry,
        prefix=config.get("prefix", ""),
        kernel_size=config.get("kernel_size", ee_utils.DEFAULT_KERNEL_SIZE),
        sampling_scale=config.get("sampling_scale", ee_utils.DEFAULT_SAMPLING_RESOLUTION),
        num_samples_per_file=config.get("num_samples_per_file", ee_utils.DEFAULT_LIMIT_PER_EE_CALL),
    )
    print("Export triggered. Check https://code.earthengine.google.com/tasks for progress.")

In [6]:
export_slice(params)

Export triggered. Check https://code.earthengine.google.com/tasks for progress.


In [7]:
def list_tasks(limit: int = 10):
    tasks = ee.batch.Task.list()
    for task in tasks[:limit]:
        status = task.status()
        print(f"{status['id']} | {status.get('state')} | {status.get('description')}")

list_tasks()

YRPTQV76FHYFC6BQGJNGGVGO | READY | eaton_sample_000
CALUUBO33CX5LCNATVIX2JKY | COMPLETED | eaton_sample_000
SOUHOGSQCEFPCDIOMUT46GZJ | COMPLETED | eaton_sample_000
SHHWRYJEFET6DXWNXC55HZXN | COMPLETED | eaton_sample_000
LKY4VJNSCN7WZPCRDUE76BWI | COMPLETED | eaton_sample_000


In [8]:
from google.cloud import storage
import google.auth
from google.auth.transport.requests import Request
import os
import json

def download_exports(config: dict, destination: Path):
    """
    Download exported TFRecords from GCS bucket.
    
    This function attempts to use Earth Engine credentials or Application 
    Default Credentials for authentication.
    """
    destination.mkdir(parents=True, exist_ok=True)
    
    client = None
    credentials = None
    project = None
    
    # Strategy 1: Try to use Earth Engine credentials directly
    try:
        ee_creds_path = os.path.expanduser('~/.config/earthengine/credentials')
        if os.path.exists(ee_creds_path):
            print("Attempting to use Earth Engine credentials...")
            with open(ee_creds_path, 'r') as f:
                ee_creds_data = json.load(f)
            
            # Try to create credentials from EE token
            from google.oauth2.credentials import Credentials
            if 'access_token' in ee_creds_data or 'refresh_token' in ee_creds_data:
                # Note: EE credentials format may vary, this is a best-effort attempt
                try:
                    credentials = Credentials(
                        token=ee_creds_data.get('access_token'),
                        refresh_token=ee_creds_data.get('refresh_token'),
                        token_uri='https://oauth2.googleapis.com/token',
                        client_id=ee_creds_data.get('client_id'),
                        client_secret=ee_creds_data.get('client_secret'),
                        scopes=['https://www.googleapis.com/auth/cloud-platform']
                    )
                    if credentials.expired and credentials.refresh_token:
                        credentials.refresh(Request())
                    project = ee_creds_data.get('project_id') or ee_creds_data.get('project')
                except Exception as e:
                    print(f"Could not use EE credentials directly: {e}")
    except Exception as e:
        print(f"Note: Could not read Earth Engine credentials: {e}")
    
    # Strategy 2: Try Application Default Credentials
    if not credentials:
        try:
            print("Attempting to use Application Default Credentials...")
            credentials, project = google.auth.default(
                scopes=['https://www.googleapis.com/auth/cloud-platform']
            )
            
            # Refresh if needed
            if not credentials.valid:
                if credentials.expired and hasattr(credentials, 'refresh_token') and credentials.refresh_token:
                    credentials.refresh(Request())
        except Exception as e:
            print(f"Could not use Application Default Credentials: {e}")
    
    # Strategy 3: Try to initialize client (may prompt for auth)
    if credentials:
        try:
            client = storage.Client(credentials=credentials, project=project)
            print("✓ Successfully authenticated with Google Cloud Storage")
        except Exception as e:
            print(f"Warning: Could not use credentials: {e}")
            client = None
    
    if not client:
        try:
            print("Attempting to initialize client without explicit credentials...")
            client = storage.Client()
        except Exception as e:
            error_msg = (
                f"\n{'='*60}\n"
                f"Could not authenticate with Google Cloud Storage.\n"
                f"Error: {e}\n\n"
                f"SOLUTIONS:\n"
                f"{'='*60}\n"
                f"Option 1 (Recommended): Install gcloud CLI\n"
                f"  1. Download from: https://cloud.google.com/sdk/docs/install\n"
                f"  2. Run in terminal: gcloud auth application-default login\n"
                f"  3. Re-run this cell\n\n"
                f"Option 2: Use a Service Account\n"
                f"  1. Create service account in Google Cloud Console\n"
                f"  2. Download JSON key file\n"
                f"  3. Set environment variable:\n"
                f"     $env:GOOGLE_APPLICATION_CREDENTIALS=\"path/to/key.json\"\n"
                f"  4. Re-run this cell\n\n"
                f"Option 3: Check if your Earth Engine account has GCS access\n"
                f"  Your Earth Engine account needs permission to access the bucket:\n"
                f"  {config['bucket']}\n"
                f"{'='*60}\n"
            )
            raise RuntimeError(error_msg) from e
    
    bucket = client.bucket(config["bucket"])

    folder = str(config.get("folder", "")).strip("/")
    gcs_prefix = (folder + "/") if folder else ""

    blobs = list(bucket.list_blobs(prefix=gcs_prefix))

    print(f"Found {len(blobs)} files under gs://{config['bucket']}/{gcs_prefix}")
    for blob in blobs:
        target = destination / Path(blob.name).name
        print("Downloading", blob.name, "->", target)
        blob.download_to_filename(target)


In [9]:
# Helper function to set up Application Default Credentials using Python
# Run this cell if you get authentication errors with Google Cloud Storage
def setup_adc():
    """
    Set up Application Default Credentials for Google Cloud Storage.
    This opens a browser window for authentication.
    """
    try:
        from google.auth.transport.requests import Request
        from google_auth_oauthlib.flow import InstalledAppFlow
        import os
        
        # Scopes needed for GCS access
        SCOPES = ['https://www.googleapis.com/auth/cloud-platform']
        
        # Path to store credentials
        creds_dir = os.path.expanduser('~/.config/gcloud')
        os.makedirs(creds_dir, exist_ok=True)
        creds_file = os.path.join(creds_dir, 'application_default_credentials.json')
        
        # Check if credentials already exist
        if os.path.exists(creds_file):
            print("Application Default Credentials already exist.")
            return True
        
        print("Setting up Application Default Credentials...")
        print("This will open a browser window for authentication.")
        
        # Use the default client config
        flow = InstalledAppFlow.from_client_secrets_file(
            # This requires a client_secrets.json file
            # For now, we'll use a different approach
            None, SCOPES
        )
        
        # This approach requires a client_secrets.json file
        # Alternative: use gcloud CLI or service account
        print("\nNote: For programmatic ADC setup, you need:")
        print("1. A Google Cloud project with API enabled")
        print("2. OAuth 2.0 client credentials")
        print("\nEasier alternative: Install gcloud CLI from:")
        print("https://cloud.google.com/sdk/docs/install")
        print("Then run: gcloud auth application-default login")
        
        return False
        
    except ImportError:
        print("Required packages not installed. Run:")
        print("pip install google-auth-oauthlib google-auth-httplib2")
        return False
    except Exception as e:
        print(f"Error setting up ADC: {e}")
        return False

# Uncomment and run this if you need to set up authentication:
# setup_adc()


In [10]:
# Alternative: Use Earth Engine credentials for GCS access
# If the download_exports function still fails, you can try this approach:
# 
# Option 1: Install gcloud CLI (recommended)
# Download from: https://cloud.google.com/sdk/docs/install
# Then run in terminal: gcloud auth application-default login
#
# Option 2: Use a service account key file
# 1. Create a service account in Google Cloud Console
# 2. Download the JSON key file
# 3. Set environment variable: 
#    $env:GOOGLE_APPLICATION_CREDENTIALS="path/to/key.json"
#
# Option 3: Try using Earth Engine's credentials (may work if EE has GCS permissions)
# The updated download_exports function should handle this automatically

In [11]:
download_exports(params, Path("exports"))

Attempting to use Earth Engine credentials...
✓ Successfully authenticated with Google Cloud Storage
Found 1 files under gs://lmudl-wildfire-compilation-bucket/eaton/
Downloading eaton/eaton_sample_000.tfrecord.gz -> exports\eaton_sample_000.tfrecord.gz
