# Example 23: European Gas Demand Forecasting with Genetic Algorithm Ensemble Mode

This notebook demonstrates using genetic algorithm feature selection with **ensemble mode** for robust weather-based gas demand forecasting.

**Dataset**: European gas demand with weather data (2013-2023)

**Key Features**:
- Panel/grouped data (multiple countries)
- Weather predictors: temperature, wind_speed
- Target: gas_demand
- **Enhancement demonstrated**: Ensemble mode for robust feature selection

**Ensemble Mode Benefits**:
- Reduces sensitivity to random initialization
- Identifies consistently important features across multiple GA runs
- More stable feature selection results

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from py_recipes import recipe
from py_recipes.steps import step_normalize, step_mutate
from py_workflows import workflow
from py_parsnip import linear_reg
from py_yardstick import rmse, mae, r_squared

# Load data
data = pd.read_csv('../_md/__data/european_gas_demand_weather_data.csv')
data['date'] = pd.to_datetime(data['date'])

print(f"Data shape: {data.shape}")
print(f"\nCountries: {sorted(data['country'].unique())}")
print(f"\nDate range: {data['date'].min()} to {data['date'].max()}")
print(f"\nFirst few rows:")
data.head(10)

ImportError: cannot import name 'step_normalize' from 'py_recipes.steps' (/Users/matthewdeane/Documents/Data Science/python/_projects/py-tidymodels/py_recipes/steps/__init__.py)

## 1. Data Preparation

Create train/test split and add engineered features.

In [None]:
# Add engineered features
data['month'] = data['date'].dt.month
data['day_of_week'] = data['date'].dt.dayofweek
data['day_of_year'] = data['date'].dt.dayofyear
data['temp_squared'] = data['temperature'] ** 2
data['wind_squared'] = data['wind_speed'] ** 2
data['temp_wind_interaction'] = data['temperature'] * data['wind_speed']

# Train/test split (80/20 by date)
split_date = data['date'].quantile(0.8)
train = data[data['date'] <= split_date].copy()
test = data[data['date'] > split_date].copy()

print(f"Train: {train.shape[0]} rows, {train['date'].min()} to {train['date'].max()}")
print(f"Test: {test.shape[0]} rows, {test['date'].min()} to {test['date'].max()}")
print(f"\nFeatures: {[col for col in train.columns if col not in ['date', 'country', 'gas_demand']]}")

## 2. Baseline: All Features (No Selection)

First, fit a model with all features to establish baseline performance.

In [None]:
# Select one country for demonstration (Austria)
country = 'Austria'
train_country = train[train['country'] == country].drop(['date', 'country'], axis=1).copy()
test_country = test[test['country'] == country].drop(['date', 'country'], axis=1).copy()

print(f"Training on {country}")
print(f"Train: {train_country.shape}")
print(f"Test: {test_country.shape}")

# Baseline model with all features
baseline_wf = workflow().add_formula('gas_demand ~ .').add_model(linear_reg())
baseline_fit = baseline_wf.fit(train_country)

# Evaluate
baseline_preds = baseline_fit.predict(test_country)
baseline_rmse = rmse(test_country['gas_demand'], baseline_preds['.pred']).iloc[0]['value']
baseline_mae = mae(test_country['gas_demand'], baseline_preds['.pred']).iloc[0]['value']
baseline_r2 = r_squared(test_country['gas_demand'], baseline_preds['.pred']).iloc[0]['value']

print(f"\n=== Baseline (All 9 Features) ===")
print(f"RMSE: {baseline_rmse:,.2f}")
print(f"MAE: {baseline_mae:,.2f}")
print(f"R²: {baseline_r2:.4f}")

# Show which features are in baseline
baseline_features = [col for col in train_country.columns if col != 'gas_demand']
print(f"\nBaseline features ({len(baseline_features)}): {baseline_features}")

## 3. Genetic Algorithm with Ensemble Mode

Use ensemble mode to run multiple GA instances and aggregate results.

**Parameters**:
- `n_ensemble=5` - Run 5 independent GA instances
- `ensemble_strategy='voting'` - Select features appearing in ≥60% of runs
- `ensemble_threshold=0.6` - Threshold for voting strategy

In [None]:
# Create recipe with genetic algorithm ensemble
rec_ensemble = recipe(train_country)
rec_ensemble = step_select_genetic_algorithm(
    rec_ensemble,
    outcome='gas_demand',
    model=linear_reg(),
    metric='rmse',
    top_n=5,  # Select top 5 features
    
    # Ensemble mode settings
    n_ensemble=5,
    ensemble_strategy='voting',
    ensemble_threshold=0.6,  # Feature must appear in 60%+ of runs
    
    # GA settings
    population_size=30,
    generations=20,
    cv_folds=3,
    random_state=42,
    verbose=True
)

# Prep and inspect results
prepped_ensemble = rec_ensemble.prep(train_country)

print(f"\n=== Ensemble Results ===")
print(f"Number of ensemble runs: {len(prepped_ensemble.prepared_steps[0]._ensemble_results)}")
print(f"\nPer-run results:")
for result in prepped_ensemble.prepared_steps[0]._ensemble_results:
    print(f"  Run {result['run_idx']+1} (seed={result['seed']}): "
          f"{len(result['features'])} features, fitness={result['fitness']:.2f}, "
          f"converged={result['converged']} in {result['n_generations']} gens")
    print(f"    Features: {result['features']}")

print(f"\nFeature frequency across runs:")
for feat, count in sorted(prepped_ensemble.prepared_steps[0]._feature_frequencies.items(), key=lambda x: x[1], reverse=True):
    pct = count / len(prepped_ensemble.prepared_steps[0]._ensemble_results) * 100
    print(f"  {feat}: {count}/5 runs ({pct:.0f}%)")

selected_features_ensemble = prepped_ensemble.prepared_steps[0]._selected_features
print(f"\nFinal selected features (voting with 60% threshold): {selected_features_ensemble}")

## 4. Compare Ensemble Strategies

Test different ensemble aggregation strategies.

In [None]:
strategies = {
    'voting_60': {'strategy': 'voting', 'threshold': 0.6},
    'voting_80': {'strategy': 'voting', 'threshold': 0.8},
    'union': {'strategy': 'union', 'threshold': 0.0},
    'intersection': {'strategy': 'intersection', 'threshold': 0.0}
}

results = {}

for strategy_name, params in strategies.items():
    print(f"\nTesting strategy: {strategy_name}")
    
    rec = recipe(train_country)
    rec = step_select_genetic_algorithm(
        rec,
        outcome='gas_demand',
        model=linear_reg(),
        metric='rmse',
        top_n=5,
        n_ensemble=5,
        ensemble_strategy=params['strategy'],
        ensemble_threshold=params['threshold'],
        population_size=30,
        generations=20,
        cv_folds=3,
        random_state=42,
        verbose=False
    )
    
    prepped = rec.prep(train_country)
    selected = prepped.prepared_steps[0]._selected_features
    
    # Fit model with selected features
    if len(selected) > 0:
        train_selected = prepped.bake(train_country)
        test_selected = prepped.bake(test_country)
        
        wf = workflow().add_formula('gas_demand ~ .').add_model(linear_reg())
        fit = wf.fit(train_selected)
        preds = fit.predict(test_selected)
        
        test_rmse = rmse(test_selected['gas_demand'], preds['.pred']).iloc[0]['value']
        test_mae = mae(test_selected['gas_demand'], preds['.pred']).iloc[0]['value']
        test_r2 = r_squared(test_selected['gas_demand'], preds['.pred']).iloc[0]['value']
        
        results[strategy_name] = {
            'n_features': len(selected),
            'features': selected,
            'rmse': test_rmse,
            'mae': test_mae,
            'r2': test_r2
        }
        
        print(f"  Selected {len(selected)} features: {selected}")
        print(f"  RMSE: {test_rmse:,.2f}, MAE: {test_mae:,.2f}, R²: {test_r2:.4f}")
    else:
        print(f"  No features selected (intersection too strict)")

## 5. Results Summary and Visualization

In [None]:
# Create comparison table
comparison_data = []
comparison_data.append({
    'Method': 'Baseline (All Features)',
    'N Features': len(baseline_features),
    'RMSE': baseline_rmse,
    'MAE': baseline_mae,
    'R²': baseline_r2
})

for strategy_name, metrics in results.items():
    comparison_data.append({
        'Method': f'Ensemble ({strategy_name})',
        'N Features': metrics['n_features'],
        'RMSE': metrics['rmse'],
        'MAE': metrics['mae'],
        'R²': metrics['r2']
    })

comparison_df = pd.DataFrame(comparison_data)
print("\n=== Performance Comparison ===")
print(comparison_df.to_string(index=False))

# Find best method
best_idx = comparison_df['RMSE'].idxmin()
print(f"\nBest method: {comparison_df.loc[best_idx, 'Method']}")
print(f"  RMSE improvement vs baseline: {(1 - comparison_df.loc[best_idx, 'RMSE'] / baseline_rmse) * 100:.1f}%")
print(f"  Feature reduction: {len(baseline_features)} → {comparison_df.loc[best_idx, 'N Features']} features")

In [None]:
# Visualization
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Plot 1: RMSE comparison
ax1 = axes[0]
methods = comparison_df['Method']
rmse_values = comparison_df['RMSE']
colors = ['gray' if 'Baseline' in m else 'steelblue' for m in methods]

bars1 = ax1.barh(range(len(methods)), rmse_values, color=colors, alpha=0.7)
ax1.set_yticks(range(len(methods)))
ax1.set_yticklabels(methods)
ax1.set_xlabel('RMSE (lower is better)', fontsize=11)
ax1.set_title('Model Performance Comparison', fontsize=12, fontweight='bold')
ax1.grid(axis='x', alpha=0.3)

# Add value labels
for i, (bar, val) in enumerate(zip(bars1, rmse_values)):
    ax1.text(val, bar.get_y() + bar.get_height()/2, f'{val:,.0f}', 
             ha='left', va='center', fontsize=9, fontweight='bold')

# Plot 2: Feature count vs RMSE
ax2 = axes[1]
n_features = comparison_df['N Features']
scatter_colors = ['red' if 'Baseline' in m else 'blue' for m in methods]

for i, (n, r, c, m) in enumerate(zip(n_features, rmse_values, scatter_colors, methods)):
    label = 'Baseline' if 'Baseline' in m else 'Ensemble' if i == 1 else None
    ax2.scatter(n, r, s=150, c=c, alpha=0.7, label=label)
    ax2.annotate(m.replace('Ensemble (', '').replace(')', ''), 
                 (n, r), xytext=(5, 5), textcoords='offset points', 
                 fontsize=8)

ax2.set_xlabel('Number of Features', fontsize=11)
ax2.set_ylabel('RMSE', fontsize=11)
ax2.set_title('Feature Count vs Performance', fontsize=12, fontweight='bold')
ax2.grid(alpha=0.3)
ax2.legend()

plt.tight_layout()
plt.savefig('gas_demand_ensemble_comparison.png', dpi=150, bbox_inches='tight')
plt.show()

print("\nPlot saved as: gas_demand_ensemble_comparison.png")

## 6. Multi-Country Application

Apply ensemble feature selection to multiple countries.

In [None]:
# Select a few countries for demonstration
countries_to_test = ['Austria', 'Germany', 'France', 'Italy', 'Spain']
multi_country_results = []

for country in countries_to_test:
    if country not in data['country'].unique():
        continue
        
    print(f"\nProcessing {country}...")
    
    train_c = train[train['country'] == country].drop(['date', 'country'], axis=1).copy()
    test_c = test[test['country'] == country].drop(['date', 'country'], axis=1).copy()
    
    # Ensemble GA
    rec = recipe(train_c)
    rec = step_select_genetic_algorithm(
        rec,
        outcome='gas_demand',
        model=linear_reg(),
        metric='rmse',
        top_n=5,
        n_ensemble=5,
        ensemble_strategy='voting',
        ensemble_threshold=0.6,
        population_size=30,
        generations=20,
        cv_folds=3,
        random_state=42,
        verbose=False
    )
    
    prepped = rec.prep(train_c)
    selected = prepped.prepared_steps[0]._selected_features
    
    if len(selected) > 0:
        train_selected = prepped.bake(train_c)
        test_selected = prepped.bake(test_c)
        
        wf = workflow().add_formula('gas_demand ~ .').add_model(linear_reg())
        fit = wf.fit(train_selected)
        preds = fit.predict(test_selected)
        
        test_rmse = rmse(test_selected['gas_demand'], preds['.pred']).iloc[0]['value']
        test_r2 = r_squared(test_selected['gas_demand'], preds['.pred']).iloc[0]['value']
        
        multi_country_results.append({
            'Country': country,
            'N Features': len(selected),
            'Selected Features': ', '.join(selected),
            'RMSE': test_rmse,
            'R²': test_r2
        })
        
        print(f"  Selected {len(selected)} features: {selected}")
        print(f"  RMSE: {test_rmse:,.2f}, R²: {test_r2:.4f}")

# Summary table
multi_country_df = pd.DataFrame(multi_country_results)
print("\n=== Multi-Country Results ===")
print(multi_country_df.to_string(index=False))

## Key Takeaways

1. **Ensemble Mode Provides Robustness**: Multiple GA runs with voting aggregation identify consistently important features

2. **Strategy Selection Matters**:
   - `voting` with 60-80% threshold: Balanced selection (features appearing in most runs)
   - `union`: Most features (features appearing in any run) - may include noise
   - `intersection`: Fewest features (features appearing in all runs) - most conservative

3. **Feature Frequency Insights**: Features selected in 80%+ of runs are likely truly important

4. **Country-Specific Patterns**: Different countries may benefit from different feature subsets

5. **Ensemble Reduces Variance**: More stable feature selection compared to single GA run