# Integration Examples with Monet Stats

This notebook demonstrates comprehensive integration workflows combining multiple metrics and techniques using Monet Stats. We'll explore multi-dataset integration, machine learning integration, and comprehensive dashboards.

In [None]:
# Import required libraries
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import xarray as xr
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import warnings

# For xarray support
import monet_stats as ms

# Set up plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
warnings.filterwarnings('ignore')  # Suppress warnings for cleaner output

## Integration Examples

We'll explore various integration workflows combining multiple metrics and techniques.

In [None]:
# Load multiple datasets for integration examples
temp_df = pd.read_csv('data/temperature_obs_mod.csv')
precip_df = pd.read_csv('data/precipitation_obs_mod.csv')
wind_df = pd.read_csv('data/wind_obs_mod.csv')

print("Loaded datasets:")
print(f"Temperature: {temp_df.shape}")
print(f"Precipitation: {precip_df.shape}")
print(f"Wind: {wind_df.shape}")

# Show first few rows of each dataset
print("\nTemperature dataset sample:")
print(temp_df.head(3))
print("\nPrecipitation dataset sample:")
print(precip_df.head(3))
print("\nWind dataset sample:")
print(wind_df.head(3))

## Multi-Dataset Integration

Combine metrics from multiple datasets to create comprehensive evaluation frameworks.

In [None]:
# Calculate metrics for each dataset
def calculate_comprehensive_metrics(obs, mod, var_name):
    """Calculate comprehensive metrics for a variable"""
    metrics = {
        'Variable': var_name,
        'MAE': ms.MAE(obs, mod),
        'RMSE': ms.RMSE(obs, mod),
        'Correlation': ms.pearsonr(obs, mod),
        'NSE': ms.NSE(obs, mod),
        'MB': ms.MB(obs, mod),
        'R2': ms.R2(obs, mod),
        'Count': len(obs)
    }
    return metrics

# Calculate metrics for each variable
temp_metrics = calculate_comprehensive_metrics(
    temp_df['observed_temp'].values, 
    temp_df['modeled_temp'].values, 
    'Temperature'
)

precip_metrics = calculate_comprehensive_metrics(
    precip_df['observed_precip'].values, 
    precip_df['modeled_precip'].values, 
    'Precipitation'
)

wind_metrics = calculate_comprehensive_metrics(
    wind_df['observed_wind_speed'].values, 
    wind_df['modeled_wind_speed'].values, 
    'Wind Speed'
)

# Combine metrics into a DataFrame
all_metrics_df = pd.DataFrame([temp_metrics, precip_metrics, wind_metrics])
print("Comprehensive Metrics Across Variables:")
print(all_metrics_df.round(4))

# Visualize multi-dataset metrics
fig, axes = plt.subplots(2, 3, figsize=(18, 10))

# MAE comparison
axes[0, 0].bar(all_metrics_df['Variable'], all_metrics_df['MAE'])
axes[0, 0].set_title('Mean Absolute Error by Variable')
axes[0, 0].set_ylabel('MAE')
axes[0, 0].tick_params(axis='x', rotation=45)

# RMSE comparison
axes[0, 1].bar(all_metrics_df['Variable'], all_metrics_df['RMSE'])
axes[0, 1].set_title('Root Mean Square Error by Variable')
axes[0, 1].set_ylabel('RMSE')
axes[0, 1].tick_params(axis='x', rotation=45)

# Correlation comparison
axes[0, 2].bar(all_metrics_df['Variable'], all_metrics_df['Correlation'])
axes[0, 2].set_title('Correlation by Variable')
axes[0, 2].set_ylabel('Correlation')
axes[0, 2].tick_params(axis='x', rotation=45)

# NSE comparison
axes[1, 0].bar(all_metrics_df['Variable'], all_metrics_df['NSE'])
axes[1, 0].set_title('Nash-Sutcliffe Efficiency by Variable')
axes[1, 0].set_ylabel('NSE')
axes[1, 0].tick_params(axis='x', rotation=45)

# Bias comparison
axes[1, 1].bar(all_metrics_df['Variable'], all_metrics_df['MB'])
axes[1, 1].set_title('Mean Bias by Variable')
axes[1, 1].set_ylabel('Bias')
axes[1, 1].tick_params(axis='x', rotation=45)

# R2 comparison
axes[1, 2].bar(all_metrics_df['Variable'], all_metrics_df['R2'])
axes[1, 2].set_title('R² by Variable')
axes[1, 2].set_ylabel('R²')
axes[1, 2].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

## Machine Learning Integration

Integrate Monet Stats metrics with machine learning models for enhanced evaluation.

In [None]:
# Create a synthetic dataset combining multiple variables for ML
# We'll create features from the existing datasets
combined_data = pd.DataFrame()

# Use a subset of data for computational efficiency
n_samples = min(len(temp_df), len(precip_df), len(wind_df))
n_samples = min(n_samples, 10000)  # Limit for computational efficiency

# Create combined features
combined_data['temp_obs'] = temp_df['observed_temp'].values[:n_samples]
combined_data['temp_mod'] = temp_df['modeled_temp'].values[:n_samples]
combined_data['precip_obs'] = precip_df['observed_precip'].values[:n_samples]
combined_data['precip_mod'] = precip_df['modeled_precip'].values[:n_samples]
combined_data['wind_obs'] = wind_df['observed_wind_speed'].values[:n_samples]
combined_data['wind_mod'] = wind_df['modeled_wind_speed'].values[:n_samples]

# Create target variable (e.g., combined error metric)
temp_error = np.abs(combined_data['temp_obs'] - combined_data['temp_mod'])
precip_error = np.abs(combined_data['precip_obs'] - combined_data['precip_mod'])
wind_error = np.abs(combined_data['wind_obs'] - combined_data['wind_mod'])

# Combined error as target
combined_data['target'] = (temp_error + precip_error + wind_error) / 3

print(f"Combined dataset shape: {combined_data.shape}")
print("Combined dataset sample:")
print(combined_data.head())

# Prepare features and target
feature_cols = ['temp_obs', 'temp_mod', 'precip_obs', 'precip_mod', 'wind_obs', 'wind_mod']
X = combined_data[feature_cols]
y = combined_data['target']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Random Forest model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1)
rf_model.fit(X_train, y_train)

# Make predictions
y_pred = rf_model.predict(X_test)

# Evaluate with standard ML metrics
ml_mse = mean_squared_error(y_test, y_pred)
ml_r2 = r2_score(y_test, y_pred)

# Evaluate with monet_stats metrics
ml_mae = ms.MAE(y_test.values, y_pred)
ml_rmse = ms.RMSE(y_test.values, y_pred)
ml_corr = ms.pearsonr(y_test.values, y_pred)
ml_nse = ms.NSE(y_test.values, y_pred)

print(f"\nMachine Learning Model Performance:")
print(f"MSE: {ml_mse:.4f}")
print(f"R²: {ml_r2:.4f}")
print(f"MAE (Monet Stats): {ml_mae:.4f}")
print(f"RMSE (Monet Stats): {ml_rmse:.4f}")
print(f"Correlation (Monet Stats): {ml_corr:.4f}")
print(f"NSE (Monet Stats): {ml_nse:.4f}")

# Feature importance
feature_importance = pd.DataFrame({
    'Feature': feature_cols,
    'Importance': rf_model.feature_importances_
}).sort_values('Importance', ascending=False)

print(f"\nFeature Importance:")
print(feature_importance)

# Plot predictions vs actual
fig, axes = plt.subplots(1, 2, figsize=(15, 6))

axes[0].scatter(y_test, y_pred, alpha=0.5)
axes[0].plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)
axes[0].set_xlabel('Actual Combined Error')
axes[0].set_ylabel('Predicted Combined Error')
axes[0].set_title(f'ML Predictions vs Actual (R² = {ml_r2:.3f})')
axes[0].grid(True, alpha=0.3)

axes[1].bar(feature_importance['Feature'], feature_importance['Importance'])
axes[1].set_xlabel('Feature')
axes[1].set_ylabel('Importance')
axes[1].set_title('Feature Importance in Combined Error Prediction')
axes[1].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

## Advanced Integration: Ensemble Model with Monet Stats

Create an ensemble model that combines multiple evaluation metrics.

In [None]:
# Create an ensemble evaluation framework
class ModelEvaluator:
    def __init__(self):
        self.metrics = {}
    
    def evaluate_model(self, obs, mod, model_name):
        """Evaluate a model using multiple metrics"""
        results = {
            'Model': model_name,
            'MAE': ms.MAE(obs, mod),
            'RMSE': ms.RMSE(obs, mod),
            'Correlation': ms.pearsonr(obs, mod),
            'NSE': ms.NSE(obs, mod),
            'MB': ms.MB(obs, mod),
            'R2': ms.R2(obs, mod),
            'IOA': ms.IOA(obs, mod),
            'WDIOA': ms.WDIOA(obs, mod),
            'KGE': ms.KGE(obs, mod)
        }
        return results
    
    def compare_models(self, obs, model_predictions):
        """Compare multiple models"""
        results = []
        for model_name, predictions in model_predictions.items():
            result = self.evaluate_model(obs, predictions, model_name)
            results.append(result)
        return pd.DataFrame(results)
    
    def create_ensemble_prediction(self, model_predictions, weights=None):
        """Create ensemble prediction from multiple models"""
        if weights is None:
            # Equal weights
            weights = np.ones(len(model_predictions)) / len(model_predictions)
        
        # Normalize weights
        weights = np.array(weights) / np.sum(weights)
        
        # Create ensemble prediction
        ensemble_pred = None
        for i, (model_name, pred) in enumerate(model_predictions.items()):
            if ensemble_pred is None:
                ensemble_pred = weights[i] * pred
            else:
                ensemble_pred += weights[i] * pred
        
        return ensemble_pred

# Create synthetic model predictions for demonstration
n_samples = len(temp_df)
observed = temp_df['observed_temp'].values

# Create different model predictions with varying quality
model_predictions = {
    'Model_1_Basic': temp_df['modeled_temp'].values,  # Original model
    'Model_2_Biased': temp_df['modeled_temp'].values + 2.0,  # Positive bias
    'Model_3_Noisy': temp_df['modeled_temp'].values + np.random.normal(0, 1, n_samples),  # Added noise
    'Model_4_Accurate': temp_df['observed_temp'].values + np.random.normal(0, 0.5, n_samples) # High accuracy
}

# Evaluate models
evaluator = ModelEvaluator()
model_comparison = evaluator.compare_models(observed, model_predictions)

print("Model Comparison:")
print(model_comparison.round(4))

# Create ensemble prediction (using simple average)
ensemble_pred = evaluator.create_ensemble_prediction(model_predictions)
ensemble_result = evaluator.evaluate_model(observed, ensemble_pred, 'Ensemble')

print(f"\nEnsemble Model Performance:")
ensemble_series = pd.Series(ensemble_result)
print(ensemble_series[['Model', 'MAE', 'RMSE', 'Correlation', 'NSE', 'R2']])

# Compare ensemble to individual models
ensemble_comparison = pd.concat([model_comparison, pd.DataFrame([ensemble_result])], ignore_index=True)
best_mae_model = ensemble_comparison.loc[ensemble_comparison['MAE'].idxmin(), 'Model']
best_rmse_model = ensemble_comparison.loc[ensemble_comparison['RMSE'].idxmin(), 'Model']
best_corr_model = ensemble_comparison.loc[ensemble_comparison['Correlation'].idxmax(), 'Model']

print(f"\nBest performing models:")
print(f"By MAE: {best_mae_model}")
print(f"By RMSE: {best_rmse_model}")
print(f"By Correlation: {best_corr_model}")

## Comprehensive Dashboard Creation

Create an integrated dashboard combining multiple evaluation techniques.

In [None]:
# Create a comprehensive dashboard function
def create_comprehensive_dashboard(temp_df, precip_df, wind_df):
    """Create a comprehensive evaluation dashboard"""
    
    # Calculate metrics for each variable
    temp_obs = temp_df['observed_temp'].values
    temp_mod = temp_df['modeled_temp'].values
    
    precip_obs = precip_df['observed_precip'].values
    precip_mod = precip_df['modeled_precip'].values
    
    wind_obs = wind_df['observed_wind_speed'].values
    wind_mod = wind_df['modeled_wind_speed'].values
    
    # Calculate comprehensive metrics
    temp_metrics = {
        'Variable': 'Temperature',
        'MAE': ms.MAE(temp_obs, temp_mod),
        'RMSE': ms.RMSE(temp_obs, temp_mod),
        'Correlation': ms.pearsonr(temp_obs, temp_mod),
        'NSE': ms.NSE(temp_obs, temp_mod),
        'MB': ms.MB(temp_obs, temp_mod),
        'R2': ms.R2(temp_obs, temp_mod)
    }
    
    precip_metrics = {
        'Variable': 'Precipitation',
        'MAE': ms.MAE(precip_obs, precip_mod),
        'RMSE': ms.RMSE(precip_obs, precip_mod),
        'Correlation': ms.pearsonr(precip_obs, precip_mod),
        'NSE': ms.NSE(precip_obs, precip_mod),
        'MB': ms.MB(precip_obs, precip_mod),
        'R2': ms.R2(precip_obs, precip_mod)
    }
    
    wind_metrics = {
        'Variable': 'Wind Speed',
        'MAE': ms.MAE(wind_obs, wind_mod),
        'RMSE': ms.RMSE(wind_obs, wind_mod),
        'Correlation': ms.pearsonr(wind_obs, wind_mod),
        'NSE': ms.NSE(wind_obs, wind_mod),
        'MB': ms.MB(wind_obs, wind_mod),
        'R2': ms.R2(wind_obs, wind_mod)
    }
    
    metrics_df = pd.DataFrame([temp_metrics, precip_metrics, wind_metrics])
    
    # Create dashboard figure
    fig = plt.figure(figsize=(20, 16))
    
    # Overall metrics comparison
    gs = fig.add_gridspec(4, 4, hspace=0.3, wspace=0.3)
    
    # MAE comparison
    ax1 = fig.add_subplot(gs[0, 0])
    ax1.bar(metrics_df['Variable'], metrics_df['MAE'])
    ax1.set_title('Mean Absolute Error')
    ax1.set_ylabel('MAE')
    ax1.tick_params(axis='x', rotation=45)
    
    # RMSE comparison
    ax2 = fig.add_subplot(gs[0, 1])
    ax2.bar(metrics_df['Variable'], metrics_df['RMSE'])
    ax2.set_title('Root Mean Square Error')
    ax2.set_ylabel('RMSE')
    ax2.tick_params(axis='x', rotation=45)
    
    # Correlation comparison
    ax3 = fig.add_subplot(gs[0, 2])
    ax3.bar(metrics_df['Variable'], metrics_df['Correlation'])
    ax3.set_title('Correlation')
    ax3.set_ylabel('Correlation')
    ax3.tick_params(axis='x', rotation=45)
    
    # NSE comparison
    ax4 = fig.add_subplot(gs[0, 3])
    ax4.bar(metrics_df['Variable'], metrics_df['NSE'])
    ax4.set_title('Nash-Sutcliffe Efficiency')
    ax4.set_ylabel('NSE')
    ax4.tick_params(axis='x', rotation=45)
    
    # Scatter plots for each variable
    # Temperature
    ax5 = fig.add_subplot(gs[1, :2])
    ax5.scatter(temp_obs[:2000], temp_mod[:2000], alpha=0.5, s=10)
    ax5.plot([temp_obs.min(), temp_obs.max()], [temp_obs.min(), temp_obs.max()], 'r--', lw=2)
    ax5.set_xlabel('Observed Temperature')
    ax5.set_ylabel('Modeled Temperature')
    ax5.set_title(f'Temperature: R² = {ms.R2(temp_obs, temp_mod):.3f}, RMSE = {ms.RMSE(temp_obs, temp_mod):.3f}')
    ax5.grid(True, alpha=0.3)
    
    # Precipitation
    ax6 = fig.add_subplot(gs[1, 2:])
    ax6.scatter(precip_obs[:2000], precip_mod[:2000], alpha=0.5, s=10)
    ax6.plot([precip_obs.min(), precip_obs.max()], [precip_obs.min(), precip_obs.max()], 'r--', lw=2)
    ax6.set_xlabel('Observed Precipitation')
    ax6.set_ylabel('Modeled Precipitation')
    ax6.set_title(f'Precipitation: R² = {ms.R2(precip_obs, precip_mod):.3f}, RMSE = {ms.RMSE(precip_obs, precip_mod):.3f}')
    ax6.grid(True, alpha=0.3)
    
    # Wind Speed
    ax7 = fig.add_subplot(gs[2, 1:3])
    ax7.scatter(wind_obs[:2000], wind_mod[:2000], alpha=0.5, s=10)
    ax7.plot([wind_obs.min(), wind_obs.max()], [wind_obs.min(), wind_obs.max()], 'r--', lw=2)
    ax7.set_xlabel('Observed Wind Speed')
    ax7.set_ylabel('Modeled Wind Speed')
    ax7.set_title(f'Wind Speed: R² = {ms.R2(wind_obs, wind_mod):.3f}, RMSE = {ms.RMSE(wind_obs, wind_mod):.3f}')
    ax7.grid(True, alpha=0.3)
    
    # Error distributions
    ax8 = fig.add_subplot(gs[3, 0])
    temp_errors = temp_mod - temp_obs
    ax8.hist(temp_errors, bins=50, edgecolor='black', alpha=0.7)
    ax8.set_xlabel('Temperature Error')
    ax8.set_ylabel('Frequency')
    ax8.set_title('Temperature Error Distribution')
    ax8.grid(True, alpha=0.3)
    
    ax9 = fig.add_subplot(gs[3, 1])
    precip_errors = precip_mod - precip_obs
    ax9.hist(precip_errors, bins=50, edgecolor='black', alpha=0.7)
    ax9.set_xlabel('Precipitation Error')
    ax9.set_ylabel('Frequency')
    ax9.set_title('Precipitation Error Distribution')
    ax9.grid(True, alpha=0.3)
    
    ax10 = fig.add_subplot(gs[3, 2])
    wind_errors = wind_mod - wind_obs
    ax10.hist(wind_errors, bins=50, edgecolor='black', alpha=0.7)
    ax10.set_xlabel('Wind Speed Error')
    ax10.set_ylabel('Frequency')
    ax10.set_title('Wind Speed Error Distribution')
    ax10.grid(True, alpha=0.3)
    
    # Summary statistics table
    ax11 = fig.add_subplot(gs[3, 3])
    ax11.axis('tight')
    ax1.axis('off')
    
    # Format the table
    cell_text = []
    for row in range(len(metrics_df)):
        cell_text.append([
            f"{metrics_df.iloc[row]['MAE']:.3f}",
            f"{metrics_df.iloc[row]['RMSE']:.3f}",
            f"{metrics_df.iloc[row]['Correlation']:.3f}",
            f"{metrics_df.iloc[row]['NSE']:.3f}"
        ])
    
    table = ax11.table(
        cellText=cell_text,
        rowLabels=metrics_df['Variable'],
        colLabels=['MAE', 'RMSE', 'Corr', 'NSE'],
        cellLoc='center',
        loc='center'
    )
    table.auto_set_font_size(False)
    table.set_fontsize(9)
    table.scale(1.2, 1.5)
    ax11.set_title('Summary Table')
    
    plt.suptitle('Comprehensive Model Evaluation Dashboard', fontsize=16, y=0.98)
    plt.show()
    
    return metrics_df

# Create the comprehensive dashboard
dashboard_metrics = create_comprehensive_dashboard(temp_df, precip_df, wind_df)
print("Comprehensive Dashboard Metrics:")
print(dashboard_metrics.round(4))

## Integration with External Tools

Demonstrate integration with external tools and libraries.

In [None]:
# Integration with xarray for gridded data
try:
    # Load spatial datasets
    obs_da = xr.open_dataset('data/spatial_obs.nc')['__xarray_dataarray_variable__']
    mod_da = xr.open_dataset('data/spatial_mod.nc')['__xarray_dataarray_variable__']
    
    print(f"Loaded spatial datasets:")
    print(f"Observed: {obs_da.shape}")
    print(f"Modeled: {mod_da.shape}")
    
    # Calculate spatial metrics
    # Flatten the data for metric calculation
    obs_flat = obs_da.values.flatten()
    mod_flat = mod_da.values.flatten()
    
    # Remove NaN values
    mask = ~(np.isnan(obs_flat) | np.isnan(mod_flat))
    obs_clean = obs_flat[mask]
    mod_clean = mod_flat[mask]
    
    # Calculate metrics using monet_stats
    xarray_metrics = {
        'MAE': ms.MAE(obs_clean, mod_clean),
        'RMSE': ms.RMSE(obs_clean, mod_clean),
        'Correlation': ms.pearsonr(obs_clean, mod_clean),
        'NSE': ms.NSE(obs_clean, mod_clean),
        'MB': ms.MB(obs_clean, mod_clean)
    }
    
    print(f"\nXarray Integration Metrics:")
    for metric, value in xarray_metrics.items():
        print(f"{metric}: {value:.4f}")
        
    # Visualization of spatial data
    fig, axes = plt.subplots(1, 3, figsize=(18, 5))
    
    # First time step of observed
    im1 = axes[0].pcolormesh(obs_da.lon, obs_da.lat, obs_da.isel(time=0), shading='auto')
    axes[0].set_title('Observed Temperature (First Time Step)')
    axes[0].set_xlabel('Longitude')
    axes[0].set_ylabel('Latitude')
    plt.colorbar(im1, ax=axes[0])
    
    # First time step of modeled
    im2 = axes[1].pcolormesh(mod_da.lon, mod_da.lat, mod_da.isel(time=0), shading='auto')
    axes[1].set_title('Modeled Temperature (First Time Step)')
    axes[1].set_xlabel('Longitude')
    axes[1].set_ylabel('Latitude')
    plt.colorbar(im2, ax=axes[1])
    
    # Difference
    diff = mod_da.isel(time=0) - obs_da.isel(time=0)
    im3 = axes[2].pcolormesh(mod_da.lon, mod_da.lat, diff, shading='auto', cmap='RdBu')
    axes[2].set_title('Difference (Modeled - Observed)')
    axes[2].set_xlabel('Longitude')
    axes[2].set_ylabel('Latitude')
    plt.colorbar(im3, ax=axes[2])
    
    plt.tight_layout()
    plt.show()
    
except FileNotFoundError:
    print("Spatial datasets not found, skipping xarray integration example")

# Summary of integration capabilities
print(f"\nIntegration Summary:")
print("1. Multi-dataset integration: Combined metrics across temperature, precipitation, and wind")
print("2. Machine learning integration: Used monet_stats metrics with ML models")
print("3. Ensemble modeling: Combined multiple models with different metrics")
print("4. Dashboard creation: Comprehensive visualization of model performance")
print("5. Xarray integration: Gridded data analysis with monet_stats metrics")

## Integration Examples Summary

This notebook demonstrated comprehensive integration workflows with Monet Stats:

1. **Multi-Dataset Integration**: Combined metrics from temperature, precipitation, and wind datasets for comprehensive evaluation.
2. **Machine Learning Integration**: Integrated Monet Stats metrics with Random Forest models for enhanced evaluation.
3. **Ensemble Modeling**: Created ensemble evaluation frameworks combining multiple models and metrics.
4. **Dashboard Creation**: Built comprehensive visualization dashboards for model evaluation.
5. **External Tool Integration**: Demonstrated integration with xarray for gridded data analysis.

These integration examples show how Monet Stats can be combined with other tools and techniques to create powerful evaluation and analysis workflows for atmospheric models.