In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import warnings
import pickle
warnings.filterwarnings('ignore')

# ML libraries
from sklearn.preprocessing import RobustScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.feature_selection import mutual_info_regression
import xgboost as xgb
import lightgbm as lgb
from catboost import CatBoostRegressor

RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)

print(f"Libraries loaded")

In [None]:
# Load data
train_df = pd.read_csv('../data/training_data.csv')
test_df = pd.read_csv('../data/test_data.csv')

train_df['timestamp'] = pd.to_datetime(train_df['timestamp'])
test_df['timestamp'] = pd.to_datetime(test_df['timestamp'])

print(f"Training: {train_df.shape}, Test: {test_df.shape}")

## 1. Enhanced Feature Engineering

Building upon the baseline features, we add:
- Power coefficient approximation (Cp)
- Tip speed ratio (TSR)
- Turbulence intensity proxies
- Higher-order polynomial features
- More sophisticated interaction terms

In [None]:
def enhanced_feature_engineering(df, is_training=True):
    """
    Enhanced feature engineering with physics-based and advanced features
    """
    df = df.copy()
    
    # ========== Basic Temporal Features ==========
    df['hour'] = df['timestamp'].dt.hour
    df['month'] = df['timestamp'].dt.month
    df['dayofweek'] = df['timestamp'].dt.dayofweek
    df['dayofyear'] = df['timestamp'].dt.dayofyear
    df['is_weekend'] = (df['dayofweek'] >= 5).astype(int)
    
    # Time of day categories (better than hour alone)
    df['time_of_day'] = pd.cut(df['hour'], bins=[0, 6, 12, 18, 24], 
                                labels=['night', 'morning', 'afternoon', 'evening'], include_lowest=True)
    df['is_night'] = (df['time_of_day'] == 'night').astype(int)
    df['is_morning'] = (df['time_of_day'] == 'morning').astype(int)
    df['is_afternoon'] = (df['time_of_day'] == 'afternoon').astype(int)
    
    # Cyclic encoding
    df['hour_sin'] = np.sin(2 * np.pi * df['hour'] / 24)
    df['hour_cos'] = np.cos(2 * np.pi * df['hour'] / 24)
    df['month_sin'] = np.sin(2 * np.pi * df['month'] / 12)
    df['month_cos'] = np.cos(2 * np.pi * df['month'] / 12)
    df['dayofyear_sin'] = np.sin(2 * np.pi * df['dayofyear'] / 365)
    df['dayofyear_cos'] = np.cos(2 * np.pi * df['dayofyear'] / 365)
    
    # ========== Advanced Wind Speed Features ==========
    # Polynomial features (up to 4th order)
    df['wind_speed_squared'] = df['wind_speed_avg'] ** 2
    df['wind_speed_cubed'] = df['wind_speed_avg'] ** 3
    df['wind_speed_4th'] = df['wind_speed_avg'] ** 4
    df['wind_speed_sqrt'] = np.sqrt(df['wind_speed_avg'])
    df['wind_speed_log'] = np.log1p(df['wind_speed_avg'])  # log(1+x) to handle zeros
    
    # Wind speed bins (categorical feature as indicators)
    df['wind_very_low'] = (df['wind_speed_avg'] < 4).astype(int)
    df['wind_low'] = ((df['wind_speed_avg'] >= 4) & (df['wind_speed_avg'] < 8)).astype(int)
    df['wind_medium'] = ((df['wind_speed_avg'] >= 8) & (df['wind_speed_avg'] < 12)).astype(int)
    df['wind_high'] = ((df['wind_speed_avg'] >= 12) & (df['wind_speed_avg'] < 16)).astype(int)
    df['wind_very_high'] = (df['wind_speed_avg'] >= 16).astype(int)
    
    # Sensor agreement metrics
    df['wind_speed_diff'] = np.abs(df['wind_speed1'] - df['wind_speed2'])
    df['wind_speed_ratio'] = df['wind_speed1'] / (df['wind_speed2'] + 0.001)
    df['wind_speed_max'] = df[['wind_speed1', 'wind_speed2']].max(axis=1)
    df['wind_speed_min'] = df[['wind_speed1', 'wind_speed2']].min(axis=1)
    df['wind_speed_cv'] = df['wind_speed_diff'] / (df['wind_speed_avg'] + 0.001)  # Coefficient of variation
    
    # Turbulence intensity proxy (based on sensor disagreement)
    df['turbulence_proxy'] = df['wind_speed_diff'] / (df['wind_speed_avg'] + 0.001)
    
    # ========== Air Density & Theoretical Power ==========
    df['temp_kelvin'] = df['outdoor_temp'] + 273.15
    df['air_density_proxy'] = df['pressure'] / df['temp_kelvin']
    
    # Theoretical wind power (P = 0.5 * ρ * A * v³)
    # Assuming swept area A is constant, we focus on ρ * v³
    df['wind_power_theoretical'] = df['air_density_proxy'] * df['wind_speed_cubed']
    df['wind_power_theoretical_normalized'] = df['wind_power_theoretical'] / (df['wind_power_theoretical'].mean() + 0.001)
    
    # ========== Rotor & Tip Speed Ratio ==========
    df['rotor_angular_velocity_squared'] = df['rotor_angular_velocity'] ** 2
    
    # Tip Speed Ratio (TSR) = (ω * R) / v
    # Assuming rotor radius R is constant (typically 40-50m for 2MW turbines)
    # We can use relative TSR without knowing R
    df['tsr_proxy'] = df['rotor_angular_velocity'] / (df['wind_speed_avg'] + 0.001)
    df['tsr_proxy_squared'] = df['tsr_proxy'] ** 2
    
    # Power coefficient proxy (Cp) - actual power divided by theoretical
    # Only meaningful during training when we have actual power
    if is_training and 'active_power' in df.columns:
        df['cp_proxy'] = df['active_power'] / (df['wind_power_theoretical'] + 0.001)
        df['cp_proxy'] = df['cp_proxy'].clip(0, 0.593)  # Betz limit
    
    # ========== Direction Alignment Features ==========
    # Wind-nacelle alignment (how well turbine faces the wind)
    df['wind_nacelle_diff'] = np.abs(df['wind_angle'] - df['nacelle_angle'])
    df['wind_nacelle_diff'] = df['wind_nacelle_diff'].apply(lambda x: min(x, 360 - x) if x > 180 else x)
    df['wind_vane_diff'] = np.abs(df['wind_angle'] - df['vane_angle'])
    df['wind_vane_diff'] = df['wind_vane_diff'].apply(lambda x: min(x, 360 - x) if x > 180 else x)
    
    # Cosine of misalignment (1 = perfect, 0 = perpendicular, -1 = opposite)
    df['nacelle_alignment'] = np.cos(np.radians(df['wind_nacelle_diff']))
    df['vane_alignment'] = np.cos(np.radians(df['wind_vane_diff']))
    
    # Yaw error indicators
    df['large_yaw_error'] = (df['wind_nacelle_diff'] > 15).astype(int)
    df['moderate_yaw_error'] = ((df['wind_nacelle_diff'] > 5) & (df['wind_nacelle_diff'] <= 15)).astype(int)
    
    # ========== Pitch Angle & Operational States ==========
    df['pitch_angle_squared'] = df['pitch_angle'] ** 2
    df['pitch_angle_cubed'] = df['pitch_angle'] ** 3
    
    # Operational state indicators
    df['is_shutdown'] = (df['pitch_angle'] > 40).astype(int)
    df['is_feathering'] = ((df['pitch_angle'] > 20) & (df['pitch_angle'] <= 40)).astype(int)
    df['is_optimal_pitch'] = (df['pitch_angle'] <= 5).astype(int)
    
    # Pitch rate of change (if we have sequential data)
    if is_training:
        df['pitch_change'] = df['pitch_angle'].diff().fillna(0)
        df['pitch_change_abs'] = np.abs(df['pitch_change'])
    
    # ========== Temperature Features ==========
    df['temp_diff'] = df['outdoor_temp'] - df['nacelle_temp']
    df['weather_outdoor_temp_diff'] = df['weather_temp'] - df['outdoor_temp']
    df['temp_diff_squared'] = df['temp_diff'] ** 2
    
    # Temperature categories
    df['is_cold'] = (df['outdoor_temp'] < 0).astype(int)
    df['is_hot'] = (df['outdoor_temp'] > 25).astype(int)
    
    # ========== Weather Condition Features ==========
    df['weather_wind_diff'] = np.abs(df['weather_wind_speed'] - df['wind_speed_avg'])
    df['has_rain'] = (df['rain_1h'] > 0).astype(int)
    df['has_snow'] = (df['snow_1h'] > 0).astype(int)
    df['has_precipitation'] = ((df['rain_1h'] > 0) | (df['snow_1h'] > 0)).astype(int)
    
    # Rain/snow intensity
    df['rain_log'] = np.log1p(df['rain_1h'])
    df['snow_log'] = np.log1p(df['snow_1h'])
    
    # Humidity effects
    df['humidity_high'] = (df['humidity'] > 80).astype(int)
    df['humidity_low'] = (df['humidity'] < 40).astype(int)
    
    # ========== Advanced Interaction Features ==========
    # Wind power affected by alignment
    df['wind_power_aligned'] = df['wind_power_theoretical'] * df['nacelle_alignment']
    
    # Wind-temperature interactions (air density effects)
    df['wind_temp_interaction'] = df['wind_speed_avg'] * df['outdoor_temp']
    df['wind_pressure_interaction'] = df['wind_speed_avg'] * df['pressure']
    df['wind_density_interaction'] = df['wind_speed_avg'] * df['air_density_proxy']
    
    # Rotor-wind interactions
    df['rotor_wind_interaction'] = df['rotor_angular_velocity'] * df['wind_speed_avg']
    df['rotor_wind_squared'] = df['rotor_angular_velocity'] * df['wind_speed_squared']
    
    # Pitch-wind interactions (control system response)
    df['pitch_wind_interaction'] = df['pitch_angle'] * df['wind_speed_avg']
    df['pitch_rotor_interaction'] = df['pitch_angle'] * df['rotor_angular_velocity']
    
    # Complex interaction: wind power with pitch control
    df['controlled_power_proxy'] = df['wind_power_theoretical'] * np.cos(np.radians(df['pitch_angle']))
    
    # ========== Statistical Aggregations (Training only) ==========
    if is_training:
        # Rolling statistics (short-term trends)
        for window in [3, 6, 12]:  # 30 min, 1 hour, 2 hours
            df[f'wind_rolling_mean_{window}'] = df['wind_speed_avg'].rolling(window=window, min_periods=1).mean()
            df[f'wind_rolling_std_{window}'] = df['wind_speed_avg'].rolling(window=window, min_periods=1).std()
            df[f'wind_rolling_max_{window}'] = df['wind_speed_avg'].rolling(window=window, min_periods=1).max()
            df[f'wind_rolling_min_{window}'] = df['wind_speed_avg'].rolling(window=window, min_periods=1).min()
            
            # Rotor rolling stats
            df[f'rotor_rolling_mean_{window}'] = df['rotor_angular_velocity'].rolling(window=window, min_periods=1).mean()
            
            # Power rolling stats (if available)
            if 'active_power' in df.columns:
                df[f'power_rolling_mean_{window}'] = df['active_power'].rolling(window=window, min_periods=1).mean()
                df[f'power_rolling_std_{window}'] = df['active_power'].rolling(window=window, min_periods=1).std()
        
        # Lag features (previous timestep)
        for lag in [1, 2, 3]:
            df[f'wind_lag_{lag}'] = df['wind_speed_avg'].shift(lag).fillna(df['wind_speed_avg'].mean())
            df[f'pitch_lag_{lag}'] = df['pitch_angle'].shift(lag).fillna(df['pitch_angle'].mean())
    
    # ========== Manufacturer Power Curve Features ==========
    # Theoretical power curve (simplified Vestas V90-2.0 MW)
    def theoretical_power(wind_speed):
        if wind_speed < 4:  # Cut-in speed
            return 0
        elif wind_speed < 12:  # Ramp-up region (cubic relationship)
            return 2050 * ((wind_speed - 4) / (12 - 4)) ** 3
        elif wind_speed < 25:  # Rated power region
            return 2050
        else:  # Cut-out
            return 0
    
    df['theoretical_power_curve'] = df['wind_speed_avg'].apply(theoretical_power)
    
    # Deviation from theoretical curve (training only)
    if is_training and 'active_power' in df.columns:
        df['power_curve_deviation'] = df['active_power'] - df['theoretical_power_curve']
        df['power_curve_ratio'] = df['active_power'] / (df['theoretical_power_curve'] + 0.001)
    
    # Drop intermediate columns we don't want as features
    if 'time_of_day' in df.columns:
        df = df.drop('time_of_day', axis=1)
    
    return df

print("Enhanced feature engineering function created")

In [None]:
# Apply enhanced feature engineering
print("Engineering features...")
train_eng = enhanced_feature_engineering(train_df, is_training=True)
test_eng = enhanced_feature_engineering(test_df, is_training=False)

print(f"\nOriginal features: {len(train_df.columns)}")
print(f"Engineered features: {len(train_eng.columns)}")
print(f"New features created: {len(train_eng.columns) - len(train_df.columns)}")

In [None]:
# Prepare features
exclude_cols = ['timestamp', 'active_power', 'reactive_power', 'cp_proxy', 'power_curve_deviation', 'power_curve_ratio']
train_features = [col for col in train_eng.columns if col not in exclude_cols]
test_features = [col for col in test_eng.columns if col not in ['timestamp']]
common_features = sorted(list(set(train_features) & set(test_features)))

print(f"\nCommon features for modeling: {len(common_features)}")

X = train_eng[common_features].replace([np.inf, -np.inf], np.nan).fillna(train_eng[common_features].median())
y = train_eng['active_power']
X_test = test_eng[common_features].replace([np.inf, -np.inf], np.nan).fillna(X.median())

print(f"Training samples: {len(X):,}")
print(f"Test samples: {len(X_test):,}")

## 2. Feature Selection

Not all features improve performance. We use mutual information to identify the most predictive features.

In [None]:
# Feature importance using mutual information
print("Computing mutual information scores...")
mi_scores = mutual_info_regression(X, y, random_state=RANDOM_STATE, n_neighbors=5)
mi_scores = pd.Series(mi_scores, index=X.columns).sort_values(ascending=False)

print(f"\nTop 30 features by mutual information:")
print(mi_scores.head(30))

# Select top features (keep top 80% by MI score)
threshold = mi_scores.quantile(0.20)  # Drop bottom 20%
selected_features = mi_scores[mi_scores > threshold].index.tolist()

print(f"\nFeatures selected: {len(selected_features)} out of {len(common_features)}")
print(f"Features dropped: {len(common_features) - len(selected_features)}")

# Update datasets
X = X[selected_features]
X_test = X_test[selected_features]

In [None]:
# Split and scale
split_idx = int(len(X) * 0.8)
X_train, X_val = X.iloc[:split_idx], X.iloc[split_idx:]
y_train, y_val = y.iloc[:split_idx], y.iloc[split_idx:]

scaler = RobustScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

print(f"Train: {X_train_scaled.shape}, Val: {X_val_scaled.shape}, Test: {X_test_scaled.shape}")

## 3. State-Based Modeling

Train separate models for different operational states to capture their unique dynamics.

In [None]:
# Define operational states
def get_operational_state(row):
    """Categorize operational state based on pitch angle and wind speed"""
    if row['pitch_angle'] > 40:
        return 'shutdown'
    elif row['wind_speed_avg'] < 4:
        return 'cut_in'
    elif row['wind_speed_avg'] >= 12:
        return 'rated'
    else:
        return 'normal'

# Apply to training data
train_eng['state'] = train_eng.apply(get_operational_state, axis=1)
test_eng['state'] = test_eng.apply(get_operational_state, axis=1)

print("Training data state distribution:")
print(train_eng['state'].value_counts())
print(f"\nTest data state distribution:")
print(test_eng['state'].value_counts())

In [None]:
# Create state-specific datasets
states = ['shutdown', 'cut_in', 'normal', 'rated']
state_models = {}
state_predictions = {}

for state in states:
    print(f"\n{'='*60}")
    print(f"Training model for state: {state}")
    print(f"{'='*60}")
    
    # Get state-specific data
    train_state_mask = train_eng['state'] == state
    val_state_mask = train_eng.iloc[split_idx:]['state'] == state
    test_state_mask = test_eng['state'] == state
    
    train_state_idx = train_state_mask.iloc[:split_idx]
    
    X_train_state = X_train_scaled[train_state_idx]
    y_train_state = y_train[train_state_idx]
    X_val_state = X_val_scaled[val_state_mask]
    y_val_state = y_val.iloc[split_idx:][val_state_mask]
    
    print(f"Training samples: {len(X_train_state)}")
    print(f"Validation samples: {len(X_val_state)}")
    
    if len(X_train_state) < 100:  # Skip if too few samples
        print(f"Skipping {state} - insufficient data")
        continue
    
    # Train XGBoost for this state
    if state == 'shutdown':
        # Shutdown state: predict near-zero
        model = xgb.XGBRegressor(n_estimators=50, max_depth=3, learning_rate=0.05, random_state=RANDOM_STATE)
    elif state == 'cut_in':
        # Cut-in: low wind, careful modeling
        model = xgb.XGBRegressor(n_estimators=100, max_depth=5, learning_rate=0.05, random_state=RANDOM_STATE)
    elif state == 'rated':
        # Rated: predict around 2050 kW
        model = xgb.XGBRegressor(n_estimators=100, max_depth=4, learning_rate=0.05, random_state=RANDOM_STATE)
    else:  # normal
        # Normal operation: most complex region
        model = xgb.XGBRegressor(
            n_estimators=200,
            max_depth=8,
            learning_rate=0.05,
            subsample=0.8,
            colsample_bytree=0.8,
            random_state=RANDOM_STATE
        )
    
    model.fit(X_train_state, y_train_state)
    
    # Evaluate
    if len(X_val_state) > 0:
        val_pred = model.predict(X_val_state)
        mae = mean_absolute_error(y_val_state, val_pred)
        print(f"Validation MAE: {mae:.4f}")
    
    state_models[state] = model

print(f"\n{'='*60}")
print(f"State-specific models trained: {len(state_models)}")
print(f"{'='*60}")

## 4. Unified Global Model

Train a global model on all data as the primary predictor, with state models as fallback/ensemble members.

In [None]:
# Train global XGBoost model
print("Training global XGBoost model...")
xgb_model = xgb.XGBRegressor(
    n_estimators=300,
    max_depth=8,
    learning_rate=0.05,
    subsample=0.8,
    colsample_bytree=0.8,
    min_child_weight=3,
    gamma=0.1,
    reg_alpha=0.1,
    reg_lambda=1.0,
    random_state=RANDOM_STATE,
    n_jobs=-1
)

xgb_model.fit(
    X_train_scaled, y_train,
    eval_set=[(X_val_scaled, y_val)],
    verbose=50
)

xgb_pred = xgb_model.predict(X_val_scaled)
xgb_mae = mean_absolute_error(y_val, xgb_pred)
print(f"\nGlobal XGBoost MAE: {xgb_mae:.4f}")

In [None]:
# Train global LightGBM model
print("\nTraining global LightGBM model...")
lgb_model = lgb.LGBMRegressor(
    n_estimators=300,
    max_depth=8,
    learning_rate=0.05,
    num_leaves=31,
    subsample=0.8,
    colsample_bytree=0.8,
    min_child_samples=20,
    reg_alpha=0.1,
    reg_lambda=1.0,
    random_state=RANDOM_STATE,
    n_jobs=-1,
    verbose=-1
)

lgb_model.fit(
    X_train_scaled, y_train,
    eval_set=[(X_val_scaled, y_val)],
    callbacks=[lgb.log_evaluation(50)]
)

lgb_pred = lgb_model.predict(X_val_scaled)
lgb_mae = mean_absolute_error(y_val, lgb_pred)
print(f"\nGlobal LightGBM MAE: {lgb_mae:.4f}")

## 5. Intelligent Ensemble

Combine global models with conditional weighting based on operational state and confidence.

In [None]:
# Create ensemble with state-aware weighting
def smart_ensemble(xgb_pred, lgb_pred, states, base_weights=(0.6, 0.4)):
    """
    Intelligent ensemble that adjusts weights based on operational state
    """
    ensemble_pred = np.zeros_like(xgb_pred)
    
    for i in range(len(xgb_pred)):
        state = states.iloc[i]
        
        # Adjust weights based on state
        if state == 'shutdown':
            # In shutdown, heavily weight toward XGBoost (seems more stable)
            w_xgb, w_lgb = 0.8, 0.2
        elif state == 'rated':
            # In rated region, equal weighting
            w_xgb, w_lgb = 0.5, 0.5
        elif state == 'cut_in':
            # Low wind is tricky, favor XGBoost
            w_xgb, w_lgb = 0.7, 0.3
        else:  # normal
            # Normal operation: use base weights
            w_xgb, w_lgb = base_weights
        
        ensemble_pred[i] = w_xgb * xgb_pred[i] + w_lgb * lgb_pred[i]
    
    return ensemble_pred

# Apply smart ensemble
val_states = train_eng.iloc[split_idx:]['state']
ensemble_pred = smart_ensemble(xgb_pred, lgb_pred, val_states)
ensemble_mae = mean_absolute_error(y_val, ensemble_pred)

print(f"\n{'='*60}")
print(f"ENSEMBLE PERFORMANCE")
print(f"{'='*60}")
print(f"XGBoost MAE:      {xgb_mae:.4f}")
print(f"LightGBM MAE:     {lgb_mae:.4f}")
print(f"Smart Ensemble:   {ensemble_mae:.4f}")
print(f"{'='*60}")

best_mae = min(xgb_mae, lgb_mae, ensemble_mae)
print(f"\nBest validation MAE: {best_mae:.4f}")

## 6. Post-Processing & Physics Constraints

Apply domain knowledge to refine predictions.

In [None]:
def apply_physics_constraints(predictions, features_df):
    """
    Apply physics-based constraints to predictions
    """
    predictions = predictions.copy()
    
    # 1. Hard constraints
    predictions = np.clip(predictions, 0, 2100)  # Physical limits
    
    # 2. Shutdown state: force near-zero
    shutdown_mask = features_df['pitch_angle'] > 40
    predictions[shutdown_mask] = np.minimum(predictions[shutdown_mask], 50)
    
    # 3. Cut-in speed: no power below 3 m/s
    cut_in_mask = features_df['wind_speed_avg'] < 3
    predictions[cut_in_mask] = 0
    
    # 4. Cut-out speed: no power above 25 m/s
    cut_out_mask = features_df['wind_speed_avg'] > 25
    predictions[cut_out_mask] = 0
    
    # 5. Rated region: cap at rated power with some variance
    rated_mask = features_df['wind_speed_avg'] >= 12
    predictions[rated_mask] = np.clip(predictions[rated_mask], 1800, 2100)
    
    # 6. Low wind region (3-5 m/s): limit max power
    low_wind_mask = (features_df['wind_speed_avg'] >= 3) & (features_df['wind_speed_avg'] < 5)
    predictions[low_wind_mask] = np.minimum(predictions[low_wind_mask], 400)
    
    # 7. Negative to zero
    predictions = np.maximum(predictions, 0)
    
    return predictions

print("Physics constraints function defined")

## 7. Generate Final Predictions

In [None]:
# Apply best model to test data
print("Generating test predictions...")

# Get predictions from all models
xgb_test = xgb_model.predict(X_test_scaled)
lgb_test = lgb_model.predict(X_test_scaled)

# Apply smart ensemble
test_states = test_eng['state']
ensemble_test = smart_ensemble(xgb_test, lgb_test, test_states)

# Apply physics constraints
final_predictions = apply_physics_constraints(ensemble_test, test_eng)

print(f"\nPrediction statistics:")
print(f"Min: {final_predictions.min():.2f}")
print(f"Max: {final_predictions.max():.2f}")
print(f"Mean: {final_predictions.mean():.2f}")
print(f"Std: {final_predictions.std():.2f}")

In [None]:
# Create submission
submission = pd.DataFrame({
    'id': range(len(final_predictions)),
    'active_power': final_predictions
})

timestamp = datetime.now().strftime('%Y%m%d-%H%M%S')
filename = f'../results/v5_advanced_submission_{timestamp}.csv'
submission.to_csv(filename, index=False)

print(f"\nSubmission saved: {filename}")
print(f"\nEstimated validation MAE: {best_mae:.4f}")
print(f"Target: < 9.13 (current best)")

## 8. Additional Strategies to Try

If the above doesn't break 9.13, consider:

### A. Pseudo-labeling
- Use test predictions with high confidence to augment training data
- Retrain with combined dataset

### B. Cross-validation ensemble
- Train multiple models on different CV folds
- Average their predictions

### C. Feature engineering v2
- Fourier features for time series patterns
- Distance to nearest training example
- Clustering-based features

### D. Hyperparameter optimization on new features
- Re-run Optuna with enhanced feature set
- May find better parameters

### E. Quantile regression
- Predict uncertainty bounds
- Adjust predictions based on confidence

### F. External data
- If allowed: weather patterns, seasonal trends
- Historical turbine performance data