# 🌬️ Wind Generation Forecasting with ANN (Weekly, NZ Islands)
This notebook uses Artificial Neural Networks to forecast **weekly wind energy generation** separately for the **South** and **North Islands**.

We address:
- **RQ1**: Accuracy of ANN using historical generation (univariate)
- **RQ2**: Improvement with lagged climate features based on correlation

📆 All analysis is done on weekly-aggregated data.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Load daily wind + climate dataset (South and North combined)
df = pd.read_csv('wind_daily_data.csv', parse_dates=['Date'])
df['Date'] = pd.to_datetime(df['Date'])
df = df.set_index('Date')

# Drop wind direction columns if present
df = df.drop(columns=[col for col in df.columns if 'WD50M' in col], errors='ignore')

In [None]:
# Resample to weekly mean or sum depending on variable type
weekly_df = pd.DataFrame()
weekly_df['GENERATION_SOUTH'] = df['GENERATION_SOUTH'].resample('W').sum()
weekly_df['GENERATION_NORTH'] = df['GENERATION_NORTH'].resample('W').sum()

# Climate variables averaged or summed as appropriate
for var in ['WS50M', 'T2M', 'PS', 'RH2M']:
    weekly_df[f'{var}_SOUTH'] = df[f'{var}_SOUTH'].resample('W').mean()
    weekly_df[f'{var}_NORTH'] = df[f'{var}_NORTH'].resample('W').mean()

for var in ['PRECTOTCORR', 'EVLAND']:
    weekly_df[f'{var}_SOUTH'] = df[f'{var}_SOUTH'].resample('W').sum()
    weekly_df[f'{var}_NORTH'] = df[f'{var}_NORTH'].resample('W').sum()

# Drop weeks with missing values
weekly_df.dropna(inplace=True)
weekly_df.head()

## 🔍 Correlation Analysis for Feature Selection
We calculate Pearson correlation between weekly wind generation and meteorological variables for each island.
Only features with |correlation| > 0.3 will be used for RQ2 models.

In [None]:
# Correlation: South Island
south_df = weekly_df[[col for col in weekly_df.columns if '_SOUTH' in col]].copy()
south_df.columns = [col.replace('_SOUTH', '') for col in south_df.columns]
corr_south = south_df.corr()
plt.figure(figsize=(8, 5))
sns.heatmap(corr_south, annot=True, cmap='coolwarm', fmt='.2f')
plt.title('South Island: Correlation Heatmap')
plt.show()

# Select features with correlation > 0.3 with GENERATION
selected_features_south = corr_south['GENERATION'].drop('GENERATION')
selected_features_south = selected_features_south[abs(selected_features_south) > 0.3].index.tolist()
print('Selected features (South):', selected_features_south)

In [None]:
# Correlation: North Island
north_df = weekly_df[[col for col in weekly_df.columns if '_NORTH' in col]].copy()
north_df.columns = [col.replace('_NORTH', '') for col in north_df.columns]
corr_north = north_df.corr()
plt.figure(figsize=(8, 5))
sns.heatmap(corr_north, annot=True, cmap='coolwarm', fmt='.2f')
plt.title('North Island: Correlation Heatmap')
plt.show()

# Select features with correlation > 0.3 with GENERATION
selected_features_north = corr_north['GENERATION'].drop('GENERATION')
selected_features_north = selected_features_north[abs(selected_features_north) > 0.3].index.tolist()
print('Selected features (North):', selected_features_north)

## 🧠 ANN Modeling for Wind Forecasting (RQ1 & RQ2)
We implement two ANN models per island:
- **RQ1**: Using only past generation values (univariate)
- **RQ2**: Including selected lagged climate features (based on correlation)

In [None]:
# ANN - South Island
# Prepare data
south_df = weekly_df[[col for col in weekly_df.columns if '_SOUTH' in col]].copy()
south_df.columns = [col.replace('_SOUTH', '') for col in south_df.columns]

# RQ1: Univariate model
X_rq1 = south_df[['GENERATION']].shift(1).dropna()
y_rq1 = south_df['GENERATION'].iloc[1:]

# RQ2: Selected lagged climate features
X_rq2 = south_df[selected_features_south].shift(1).dropna()
y_rq2 = south_df['GENERATION'].iloc[1:]

# Align y values for lag
y_rq2 = y_rq2.loc[X_rq2.index]

# Split and scale
def train_ann_model(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
    scaler_X = MinMaxScaler()
    scaler_y = MinMaxScaler()
    X_train_scaled = scaler_X.fit_transform(X_train)
    X_test_scaled = scaler_X.transform(X_test)
    y_train_scaled = scaler_y.fit_transform(y_train.values.reshape(-1, 1))

    model = tf.keras.Sequential([
        tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse')
    model.fit(X_train_scaled, y_train_scaled, epochs=100, verbose=0)

    preds_scaled = model.predict(X_test_scaled)
    preds = scaler_y.inverse_transform(preds_scaled)
    mae = mean_absolute_error(y_test, preds)
    rmse = np.sqrt(mean_squared_error(y_test, preds))
    mape = np.mean(np.abs((y_test - preds.flatten()) / y_test)) * 100
    return mae, rmse, mape

# Train both models
mae_rq1, rmse_rq1, mape_rq1 = train_ann_model(X_rq1, y_rq1)
mae_rq2, rmse_rq2, mape_rq2 = train_ann_model(X_rq2, y_rq2)

print(f"South Island ANN RQ1 - MAPE: {mape_rq1:.2f}% | MAE: {mae_rq1:.2f} | RMSE: {rmse_rq1:.2f}")
print(f"South Island ANN RQ2 - MAPE: {mape_rq2:.2f}% | MAE: {mae_rq2:.2f} | RMSE: {rmse_rq2:.2f}")

In [None]:
# ANN - North Island
north_df = weekly_df[[col for col in weekly_df.columns if '_NORTH' in col]].copy()
north_df.columns = [col.replace('_NORTH', '') for col in north_df.columns]

# RQ1: Univariate model
X_rq1 = north_df[['GENERATION']].shift(1).dropna()
y_rq1 = north_df['GENERATION'].iloc[1:]

# RQ2: Selected lagged climate features
X_rq2 = north_df[selected_features_north].shift(1).dropna()
y_rq2 = north_df['GENERATION'].iloc[1:]

# Align y values for lag
y_rq2 = y_rq2.loc[X_rq2.index]

# Train both models
mae_rq1, rmse_rq1, mape_rq1 = train_ann_model(X_rq1, y_rq1)
mae_rq2, rmse_rq2, mape_rq2 = train_ann_model(X_rq2, y_rq2)

print(f"North Island ANN RQ1 - MAPE: {mape_rq1:.2f}% | MAE: {mae_rq1:.2f} | RMSE: {rmse_rq1:.2f}")
print(f"North Island ANN RQ2 - MAPE: {mape_rq2:.2f}% | MAE: {mae_rq2:.2f} | RMSE: {rmse_rq2:.2f}")

## 📊 ANN Performance Summary (RQ1 vs RQ2)
This table summarizes evaluation metrics for both RQ1 and RQ2 ANN models across South and North Islands.

In [None]:
import pandas as pd

comparison_df = pd.DataFrame({
    'Island': ['South', 'South', 'North', 'North'],
    'Model': ['ANN-RQ1', 'ANN-RQ2', 'ANN-RQ1', 'ANN-RQ2'],
    'MAPE (%)': [mape_rq1, mape_rq2, mape_rq1, mape_rq2],
    'MAE': [mae_rq1, mae_rq2, mae_rq1, mae_rq2],
    'RMSE': [rmse_rq1, rmse_rq2, rmse_rq1, rmse_rq2]
})
display(comparison_df.round(2))