# Model Evaluation - NEON AOP Crosswalk

This notebook provides comprehensive evaluation of the trained crosswalk models, including:
- Model performance assessment across different metrics
- Spatial and temporal validation
- Error analysis and diagnostics
- Fire-specific validation results
- Integration testing with real satellite data

In [None]:
# Import required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import warnings
import joblib
from datetime import datetime
import json

# Geospatial
import geopandas as gpd
import folium
from folium import plugins

# Scientific computing
from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error
from sklearn.model_selection import cross_val_score, KFold
from scipy import stats

# Import our custom modules
import sys
sys.path.append('..')
from src.features.aop_crosswalk import validate_crosswalk
from src.integration.aop_integration import AOPIntegrationManager

# Configure settings
warnings.filterwarnings('ignore')
plt.style.use('seaborn-v0_8-darkgrid')
np.random.seed(42)

# Set up directories
DATA_DIR = Path('../data')
MODELS_DIR = DATA_DIR / 'models'
RESULTS_DIR = Path('../results/model_evaluation')
RESULTS_DIR.mkdir(parents=True, exist_ok=True)

print(f"Working directory: {Path.cwd()}")
print(f"Models directory: {MODELS_DIR}")
print(f"Results will be saved to: {RESULTS_DIR}")

## 1. Load Trained Models and Data

First, we'll load the models trained in the previous notebook along with test datasets.

In [None]:
# Load the most recent models
model_files = sorted(MODELS_DIR.glob('*_crosswalk_models_*.pkl'))

if len(model_files) >= 2:
    # Load linear models
    linear_model_path = [f for f in model_files if 'linear' in f.name][-1]
    linear_models = joblib.load(linear_model_path)
    print(f"✅ Loaded linear models from: {linear_model_path.name}")
    
    # Load ensemble models
    ensemble_model_path = [f for f in model_files if 'ensemble' in f.name][-1]
    ensemble_models = joblib.load(ensemble_model_path)
    print(f"✅ Loaded ensemble models from: {ensemble_model_path.name}")
else:
    print("⚠️ No trained models found. Using synthetic models for demonstration.")
    # Create synthetic models for demonstration
    from sklearn.linear_model import Ridge
    from sklearn.ensemble import GradientBoostingRegressor
    
    target_variables = ['NDVI_AOP', 'NBR_AOP', 'Canopy_Height', 'LAI', 'Biomass']
    linear_models = {}
    ensemble_models = {}
    
    for target in target_variables:
        linear_models[target] = {
            'model': Ridge(alpha=1.0),
            'metrics': {'test_r2': 0.75, 'test_mae': 0.1}
        }
        ensemble_models[target] = {
            'model': GradientBoostingRegressor(n_estimators=100),
            'metrics': {'test_r2': 0.85, 'test_mae': 0.08}
        }

# Load metadata
metadata_files = sorted(MODELS_DIR.glob('model_metadata_*.json'))
if metadata_files:
    with open(metadata_files[-1], 'r') as f:
        model_metadata = json.load(f)
    print(f"✅ Loaded model metadata from: {metadata_files[-1].name}")
else:
    # Create synthetic metadata
    model_metadata = {
        'target_variables': target_variables,
        'satellite_features': ['NDVI', 'NBR', 'NDWI', 'EVI', 'SAVI'],
        'sites': {
            'fire_sites': ['GRSM', 'SOAP', 'SYCA'],
            'baseline_sites': ['SRER', 'JORN', 'ONAQ', 'SJER']
        }
    }

print(f"\nTarget variables: {model_metadata['target_variables']}")
print(f"Satellite features: {model_metadata['satellite_features']}")

## 2. Performance Metrics Assessment

We'll calculate comprehensive performance metrics for both model types across all target variables.

In [None]:
# Create synthetic test data for evaluation
np.random.seed(42)
n_test_samples = 200

# Satellite features (predictors)
X_test = pd.DataFrame({
    'NDVI': np.random.normal(0.7, 0.15, n_test_samples),
    'NBR': np.random.normal(0.6, 0.2, n_test_samples),
    'NDWI': np.random.normal(0.2, 0.1, n_test_samples),
    'EVI': np.random.normal(0.5, 0.15, n_test_samples),
    'SAVI': np.random.normal(0.4, 0.12, n_test_samples)
})

# AOP features (ground truth)
y_test = pd.DataFrame({
    'NDVI_AOP': X_test['NDVI'] * 1.1 + np.random.normal(0, 0.05, n_test_samples),
    'NBR_AOP': X_test['NBR'] * 1.15 + np.random.normal(0, 0.06, n_test_samples),
    'Canopy_Height': 15 + 20 * X_test['NDVI'] + np.random.normal(0, 2, n_test_samples),
    'LAI': 2 + 3 * X_test['NDVI'] + np.random.normal(0, 0.3, n_test_samples),
    'Biomass': 50 + 100 * X_test['NDVI'] + np.random.normal(0, 10, n_test_samples)
})

# Clip to realistic ranges
y_test['NDVI_AOP'] = np.clip(y_test['NDVI_AOP'], -1, 1)
y_test['NBR_AOP'] = np.clip(y_test['NBR_AOP'], -1, 1)
y_test['Canopy_Height'] = np.clip(y_test['Canopy_Height'], 0, 50)
y_test['LAI'] = np.clip(y_test['LAI'], 0, 8)
y_test['Biomass'] = np.clip(y_test['Biomass'], 0, 300)

print(f"Test dataset shape: {X_test.shape}")
print(f"Target variables shape: {y_test.shape}")