# Week 3: Training Pipeline (Step 1 — Setup)

This notebook will be built incrementally. Today: repo-root detection, output directories, Earth Engine init, and AOI load/preview.

- Kernel: Python (geoai)
- AOI: `data/external/aoi.geojson`


In [None]:
# Step 1: Imports and configuration
from __future__ import annotations
import os
from pathlib import Path
import sys

import yaml
from dotenv import load_dotenv

# Geospatial stack
import geopandas as gpd

# Optional: EE/geemap for quick checks
import ee
import geemap

# Reproducibility
import random
import numpy as np

# Repo-root detection (looks for README.md as anchor)
THIS_NOTEBOOK = Path(__file__).resolve() if "__file__" in globals() else Path.cwd()
# Start from notebook's parent and search upwards for repo root containing README.md
repo_root = None
for parent in [THIS_NOTEBOOK] + list(THIS_NOTEBOOK.parents):
    if (parent / "README.md").exists():
        repo_root = parent
        break
if repo_root is None:
    # Fallback to current working directory
    repo_root = THIS_NOTEBOOK

print(f"Repo root detected: {repo_root}")

# Paths
figures_dir = repo_root / "figures"
reports_dir = repo_root / "reports"
models_dir = repo_root / "models"
config_dir = repo_root / "config"
data_dir = repo_root / "data"
external_dir = data_dir / "external"
aoi_path = external_dir / "aoi.geojson"

# Ensure directories exist
for d in [figures_dir, reports_dir, models_dir, config_dir]:
    d.mkdir(parents=True, exist_ok=True)

# Env
dotenv_path = repo_root / ".env"
if dotenv_path.exists():
    load_dotenv(dotenv_path)
    print("Loaded .env")
else:
    print("No .env found — proceeding without it.")

# Seeds for reproducibility
SEED = int(os.getenv("SEED", 42))
random.seed(SEED)
np.random.seed(SEED)

# Earth Engine init
try:
    ee.Initialize()
    print("Earth Engine initialized.")
except Exception as e:
    try:
        ee.Authenticate(auth_mode="notebook")
        ee.Initialize()
        print("Earth Engine authenticated and initialized.")
    except Exception as e2:
        print("Earth Engine unavailable right now; we'll proceed using local processing where possible.")
        print(f"EE error: {e2}")

# Load AOI
assert aoi_path.exists(), f"AOI not found at {aoi_path}"
aoi_gdf = gpd.read_file(aoi_path)
# Normalize CRS to EPSG:4326 for cross-lib compatibility
if aoi_gdf.crs is None:
    aoi_gdf.set_crs(epsg=4326, inplace=True)
else:
    aoi_gdf = aoi_gdf.to_crs(4326)
print("AOI loaded:", aoi_gdf.shape, "CRS:", aoi_gdf.crs)

# Quick map preview (geemap)
try:
    m = geemap.Map(center=(aoi_gdf.geometry.unary_union.centroid.y,
                           aoi_gdf.geometry.unary_union.centroid.x), zoom=10)
    m.add_gdf(aoi_gdf, layer_name="AOI")
    display(m)
except Exception as e:
    print("Map preview unavailable:", e)
