# Step 1: Setup & Authentication

In [None]:
# Step 1.1: Install required packages
import subprocess
import sys

def install_package(package):
    """Install a package using pip"""
    try:
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])
        print(f"✅ Successfully installed {package}")
    except subprocess.CalledProcessError as e:
        print(f"❌ Failed to install {package}: {e}")

# List of required packages
required_packages = [
    "earthengine-api>=0.1.350",
    "geemap>=0.32.0", 
    "geopandas>=0.13.0",
    "rasterio>=1.3.0",
    "scikit-learn>=1.3.0",
    "joblib>=1.3.0",
    "matplotlib>=3.6.0",
    "pandas>=1.5.0",
    "numpy>=1.22.0"
]

print("🔧 Installing required packages...")
for package in required_packages:
    install_package(package)

print("\n✅ All packages installed successfully!")

In [None]:
# Step 1.2: Import libraries and set up paths
import os
import sys
import warnings
from pathlib import Path
warnings.filterwarnings('ignore')

# Core libraries
import numpy as np
import pandas as pd
from datetime import datetime

# Geospatial libraries
import ee
import geemap
import geopandas as gpd

# Set up project directories
project_root = Path.cwd().parent
data_dir = project_root / "data"
results_dir = data_dir / "results"
training_dir = str(data_dir / "training") # GEE prefers string paths

# Create directories if they don't exist
results_dir.mkdir(parents=True, exist_ok=True)

print(f"📂 Project root: {project_root}")
print(f"💾 Training data directory: {training_dir}")
print("📚 All libraries imported successfully!")

In [None]:
# Step 1.3: Authenticate and initialize Google Earth Engine
try:
    ee.Initialize()
    print('✅ Google Earth Engine is already authenticated and initialized.')
    gee_ready = True
except Exception as e:
    print('🔑 Authenticating Google Earth Engine...')
    try:
        ee.Authenticate()
        ee.Initialize()
        print('✅ Google Earth Engine authenticated and initialized successfully!')
        gee_ready = True
    except Exception as auth_error:
        print(f'❌ GEE Authentication failed: {auth_error}')
        gee_ready = False

# Set up timestamp for this run
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
print(f"🕐 Workflow timestamp: {timestamp}")

# Step 2: Configure Classes and Contextual Rules

In [None]:
# Configure the 12 specific land cover classes based on your training data
UZBEKISTAN_LAND_COVER_CLASSES = {
    1: {'name': 'Residential', 'description': 'APPHOUSE residential areas', 'source': 'APPHOUSE_production_ready'},
    2: {'name': 'Agriculture', 'description': 'Agricultural fields and cropland', 'source': 'AgricultureFields_production_ready'},
    3: {'name': 'Buildings', 'description': 'General buildings and structures', 'source': 'Buildings_production_ready'},
    4: {'name': 'Forest', 'description': 'Forest agency lands (context-dependent: desert/steppe in flat areas, juniper/shrubs in mountains)', 'source': 'ForestAgencyLands_production_ready'},
    5: {'name': 'Residential_Private', 'description': 'High-resolution private residential areas', 'source': 'HighResPrivate_production_ready'},
    6: {'name': 'Roads_Highways', 'description': 'Major highways and road networks', 'source': 'Highways_production_ready'},
    7: {'name': 'Land_Stock', 'description': 'Land stock and reserves', 'source': 'LandStock_production_ready'},
    8: {'name': 'Non_Residential', 'description': 'Non-residential buildings and facilities', 'source': 'NotResidential_production_ready'},
    9: {'name': 'Protected', 'description': 'Protected areas (context-dependent: varies by elevation and terrain)', 'source': 'ProtectedAreas_production_ready'},
    10: {'name': 'Railways', 'description': 'Railway lines and infrastructure', 'source': 'Railways_production_ready'},
    11: {'name': 'Shared_Lands', 'description': 'Shared and communal lands', 'source': 'SharedLands_production_ready'},
    12: {'name': 'Water', 'description': 'Water bodies, rivers, and lakes', 'source': 'Water_production_ready'}
}

# Define contextual features for terrain-dependent classification
CONTEXTUAL_FEATURES = {
    'elevation_thresholds': {
        'flat': (0, 500),      # 0-500m: desert/steppe context
        'hilly': (500, 1500),  # 500-1500m: mixed terrain
        'mountain': (1500, 5000)  # 1500m+: mountain context with forests/shrubs
    },
    'ndvi_thresholds': {
        'bare': (-1, 0.1),     # Very low vegetation
        'sparse': (0.1, 0.3),  # Sparse vegetation
        'moderate': (0.3, 0.6), # Moderate vegetation
        'dense': (0.6, 1.0)    # Dense vegetation
    }
}

print("🎯 Uzbekistan Land Cover Classes (12 classes):")
for class_id, class_info in UZBEKISTAN_LAND_COVER_CLASSES.items():
    print(f"   {class_id:2d}. {class_info['name']:18} - {class_info['description']}")

print(f"\n📍 Training data file: landcover_training.geojson")
print(f"🏔️  Context-dependent classes: Forest (4), Protected (9)")
print(f"📊 Features: Spectral bands + NDVI + Elevation + Terrain context")

# Step 3: Load and Analyze Training Data

In [None]:
# Load and analyze the training GeoJSON file
training_geojson = os.path.join(training_dir, "landcover_training.geojson")

if os.path.exists(training_geojson):
    print(f"📂 Loading training data: {training_geojson}")
    
    try:
        # Load the GeoJSON file
        training_gdf = gpd.read_file(training_geojson)
        
        print(f"✅ Training data loaded successfully!")
        print(f"   📊 Total features: {len(training_gdf):,}")
        print(f"   🗂️  Columns: {list(training_gdf.columns)}")
        print(f"   🌍 CRS: {training_gdf.crs}")
        
        # Analyze the class distribution
        if 'layer_id' in training_gdf.columns:
            layer_counts = training_gdf['layer_id'].value_counts().sort_index()
            print(f"\n📈 Training Data Layer Distribution:")
            for layer_id, count in layer_counts.items():
                if layer_id in UZBEKISTAN_LAND_COVER_CLASSES:
                    class_name = UZBEKISTAN_LAND_COVER_CLASSES[layer_id]['name']
                    print(f"   {layer_id:2d}. {class_name:18}: {count:6,} features")
        
        training_data_available = True
        
    except Exception as e:
        print(f"❌ Error loading training data: {e}")
        training_data_available = False
        
else:
    print(f"❌ Training data not found: {training_geojson}")
    print("📥 Please ensure landcover_training.geojson is in the training directory")
    training_data_available = False

# Step 4: Create GEE Satellite Composite

In [None]:
# Simplified Google Earth Engine processing for Uzbekistan
def create_uzbekistan_composite_with_context():
    """Create a composite image with spectral bands, NDVI, and elevation for Uzbekistan"""
    
    print("🛰️  Creating Uzbekistan satellite composite with contextual data...")
    
    try:
        # Define Uzbekistan geometry
        uzbekistan_bounds = ee.Geometry.Rectangle([55.9, 37.2, 73.2, 45.6])
        
        # Load Landsat 8 Collection 2 Surface Reflectance
        landsat = ee.ImageCollection('LANDSAT/LC08/C02/T1_L2') \
                    .filterBounds(uzbekistan_bounds) \
                    .filterDate('2023-01-01', '2023-12-31') \
                    .filter(ee.Filter.lt('CLOUD_COVER', 20)) \
                    .map(lambda img: img.clip(uzbekistan_bounds))
        
        print(f"✅ Found {landsat.size().getInfo()} Landsat images")
        
        # Create median composite
        composite = landsat.median()
        
        # Select and scale surface reflectance bands
        optical_bands = composite.select(['SR_B1', 'SR_B2', 'SR_B3', 'SR_B4', 'SR_B5', 'SR_B6', 'SR_B7']) \
                                .multiply(0.0000275).add(-0.2)
        
        # Calculate spectral indices
        ndvi = optical_bands.normalizedDifference(['SR_B5', 'SR_B4']).rename('NDVI')
        
        # Add elevation data (SRTM)
        elevation = ee.Image('USGS/SRTMGL1_003').select('elevation').clip(uzbekistan_bounds)
        
        # Calculate slope from elevation
        slope = ee.Terrain.slope(elevation).rename('SLOPE')
        
        # Create terrain context layers
        elevation_mountain = elevation.gte(1500).rename('TERRAIN_MOUNTAIN')
        
        # Combine all bands into final composite
        final_composite = optical_bands \
                         .addBands(ndvi) \
                         .addBands(elevation) \
                         .addBands(slope) \
                         .addBands(elevation_mountain)
        
        print("✅ Composite created with bands:")
        band_names = final_composite.bandNames().getInfo()
        for i, band in enumerate(band_names):
            print(f"   {i+1:2d}. {band}")
        
        return final_composite, uzbekistan_bounds
        
    except Exception as e:
        print(f"❌ Error creating composite: {e}")
        return None, None

# Create the composite if GEE is ready
if gee_ready:
    composite_image, uzbekistan_geom = create_uzbekistan_composite_with_context()
else:
    print("❌ Google Earth Engine not ready")

# Step 5: Extract Training Features

In [None]:
# Extract training features from GeoJSON with spectral and contextual data
def extract_training_features_simplified():
    """Extract features from training polygons using the composite image"""
    
    if not training_data_available or not gee_ready or composite_image is None:
        print("❌ Prerequisites not met for feature extraction")
        return None
    
    print("📊 Extracting training features from GeoJSON...")
    
    try:
        # Convert GeoPanel DataFrame to Earth Engine FeatureCollection
        training_fc = geemap.geopandas_to_ee(training_gdf)
        print(f"✅ Converted GeoDataFrame to FeatureCollection with {training_fc.size().getInfo()} features")
        
        # Sample the composite image at training locations
        print("🔬 Sampling pixel values at training locations...")
        
        # Sample the image
        training_sample = composite_image.sampleRegions(
            collection=training_fc,
            properties=['layer_id'],
            scale=30,  # 30m Landsat resolution
            geometries=True # Keep geometries for inspection
        )
        
        sample_size = training_sample.size()
        print(f"📈 Extracted {sample_size.getInfo()} training samples (pixels)")
        
        # Export training data to CSV for local processing
        export_task = ee.batch.Export.table.toDrive(
            collection=training_sample,
            description=f'uzbekistan_training_features_{timestamp}',
            fileFormat='CSV',
            folder='earthengine_exports'
        )
        
        export_task.start()
        print(f"✅ Training data export started: uzbekistan_training_features_{timestamp}.csv")
        print(f"📥 Check Google Drive folder 'earthengine_exports' for the CSV file")
        
        return training_sample, export_task
        
    except Exception as e:
        print(f"❌ Error extracting training features: {e}")
        return None, None

# Extract training features
if training_data_available and gee_ready and 'composite_image' in locals():
    training_samples, export_task = extract_training_features_simplified()
else:
    print("⏭️  Skipping feature extraction - missing prerequisites")

# Step 6: Summary and Next Steps

In [None]:
# Summary and next steps for simplified Uzbekistan classification
print("="*70)
print("🌍 SIMPLIFIED UZBEKISTAN LAND COVER CLASSIFICATION SUMMARY")
print("="*70)

print(f"\n📂 Data Configuration:")
print(f"   • Training file: landcover_training.geojson")
print(f"   • Classes: 12 specific Uzbekistan land cover types")
print(f"   • Context-aware: Forest and Protected areas")

print(f"\n📊 Workflow Status:")
workflow_components = [
    ("Google Earth Engine", gee_ready),
    ("Training Data", training_data_available if 'training_data_available' in locals() else False),
    ("Composite Image", 'composite_image' in locals() and composite_image is not None),
    ("Feature Extraction Task", 'export_task' in locals())
]

for component, status in workflow_components:
    status_icon = "✅" if status else "❌"
    print(f"   {status_icon} {component}")

print(f"\n🚀 Next Steps:")
if 'export_task' in locals() and export_task:
    print(f"   1. ⏳ Monitor the export task in your Google Earth Engine account.")
    print(f"   2. 📥 Once complete, download the CSV file from your Google Drive 'earthengine_exports' folder.")
    print(f"   3. 🤖 Use the CSV to train a machine learning model (e.g., RandomForest) locally.")
    print(f"   4. 🗺️  Apply the trained model to classify the full Uzbekistan satellite imagery.")
else:
    print(f"   1. 🔄 Resolve any errors in the cells above and re-run the notebook.")

print("="*70)