# Spatial ETL Notebook UI
Use this notebook to authenticate, configure, and run the ETL pipeline without editing YAML files.

1. Run the first cell to ensure project paths are set (and optionally trigger authentication).
2. Run the second cell to launch the widget UI, choose AOI/variables/year/season/CRS/storage, and execute the job.

In [None]:
# Cell 1: environment/authentication setup
import sys
import subprocess
from pathlib import Path

PROJECT_ROOT = Path.cwd().resolve()
if not (PROJECT_ROOT / 'src').exists():
    # If notebook launched inside notebooks/, go one level up
    PROJECT_ROOT = PROJECT_ROOT.parent
SRC_PATH = PROJECT_ROOT / 'src'
if SRC_PATH.exists() and str(SRC_PATH) not in sys.path:
    sys.path.insert(0, str(SRC_PATH))

print('Project root:', PROJECT_ROOT)
print('Ensure `earthengine authenticate` has been run (either here or in a terminal).')

RUN_GEE_AUTH = False
if RUN_GEE_AUTH:
    print('Starting Earth Engine auth flow...')
    subprocess.run(['earthengine', 'authenticate'], check=True)
else:
    print('Set RUN_GEE_AUTH=True above to run auth here, or run `earthengine authenticate` in a terminal.')


In [None]:
# Cell 2: interactive UI to configure and run the pipeline
import sys
import yaml
import ipywidgets as widgets
from IPython.display import display
from pathlib import Path

PROJECT_ROOT = Path.cwd().resolve()
if not (PROJECT_ROOT / 'src').exists():
    PROJECT_ROOT = PROJECT_ROOT.parent
SRC_PATH = PROJECT_ROOT / 'src'
if SRC_PATH.exists() and str(SRC_PATH) not in sys.path:
    sys.path.insert(0, str(SRC_PATH))

from spatial_data_mining.orchestrator import run_pipeline_from_dict

BASE_CONFIG_PATH = PROJECT_ROOT / 'config' / 'base.yaml'
AOI_DIR = PROJECT_ROOT / 'data' / 'aoi'

def load_defaults():
    if BASE_CONFIG_PATH.exists():
        with BASE_CONFIG_PATH.open('r', encoding='utf-8') as f:
            data = yaml.safe_load(f) or {}
    else:
        data = {}
    defaults = data.get('defaults', {})
    allowed_crs = defaults.get('allowed_crs', ['EPSG:4326'])
    resolution = defaults.get('resolution_m', 20)
    storage = defaults.get('storage', {'kind': 'local_cog', 'output_dir': 'data/outputs'})
    return allowed_crs, resolution, storage

def list_aois():
    if not AOI_DIR.exists():
        return []
    return sorted(str(p.relative_to(PROJECT_ROOT)) for p in AOI_DIR.glob('*') if p.is_file())

allowed_crs, default_resolution, storage_defaults = load_defaults()
aoi_options = ['<custom path>'] + list_aois()

job_name = widgets.Text(value='notebook_job', description='Job name')
aoi_dropdown = widgets.Dropdown(options=aoi_options, description='AOI file')
custom_aoi = widgets.Text(value='data/aoi/sample.geojson', description='Custom AOI')
target_crs = widgets.Dropdown(options=allowed_crs, description='Target CRS')
resolution = widgets.FloatText(value=default_resolution, description='Resolution (m)')
year = widgets.IntText(value=2023, description='Year')
season = widgets.Text(value='summer', description='Season')
variables = widgets.SelectMultiple(
    options=['ndvi', 'ndmi', 'msi'],
    value=('ndvi', 'ndmi', 'msi'),
    description='Variables'
)
storage_kind = widgets.ToggleButtons(
    options=[('Local COG', 'local_cog'), ('GCS COG', 'gcs_cog')],
    value='local_cog',
    description='Storage'
)
output_dir = widgets.Text(value=storage_defaults.get('output_dir', 'data/outputs'), description='Output dir')
gcs_bucket = widgets.Text(value=storage_defaults.get('bucket', 'your-bucket'), description='GCS bucket')
gcs_prefix = widgets.Text(value=storage_defaults.get('prefix', 'spatial/outputs'), description='GCS prefix')
run_button = widgets.Button(description='Run pipeline', button_style='primary')
log_output = widgets.Output()

def get_aoi_path():
    if aoi_dropdown.value == '<custom path>':
        return custom_aoi.value
    return aoi_dropdown.value

def on_run_clicked(_):
    log_output.clear_output()
    selected_vars = list(variables.value)
    if not selected_vars:
        with log_output:
            print('Select at least one variable before running.')
        return
    storage_cfg = {'kind': storage_kind.value, 'output_dir': output_dir.value}
    if storage_kind.value == 'gcs_cog':
        storage_cfg['bucket'] = gcs_bucket.value
        storage_cfg['prefix'] = gcs_prefix.value
    job_section = {
        'name': job_name.value,
        'aoi_path': get_aoi_path(),
        'target_crs': target_crs.value,
        'resolution_m': float(resolution.value),
        'year': int(year.value),
        'season': season.value,
        'variables': selected_vars,
        'storage': storage_cfg,
    }
    with log_output:
        print('Running pipeline...')
    try:
        results = run_pipeline_from_dict(job_section)
    except Exception as exc:
        with log_output:
            print('Pipeline failed:', exc)
        return
    with log_output:
        print('Pipeline completed. Outputs:')
        for res in results:
            print(f"- {res['variable']}: local={res['local_path']} gcs={res['gcs_uri']}")

run_button.on_click(on_run_clicked)

ui = widgets.VBox([
    job_name,
    widgets.HBox([aoi_dropdown, custom_aoi]),
    widgets.HBox([target_crs, resolution]),
    widgets.HBox([year, season]),
    variables,
    storage_kind,
    output_dir,
    widgets.HBox([gcs_bucket, gcs_prefix]),
    run_button,
    log_output,
])

display(ui)
