# 🌬️ Wind Forecasting by Island: SARIMA & SARIMAX (NZ)
This notebook forecasts weekly wind generation separately for **South Island** and **North Island** using SARIMA and SARIMAX models.
Both RQ1 and RQ2 are addressed using climate features (excluding wind direction).

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error, mean_absolute_error
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Load weekly wind and climate data by island
df = pd.read_csv('merged_weekly_wind_climate_by_island.csv', parse_dates=['Date'], index_col='Date')

# Drop wind direction if present
df = df.drop(columns=[col for col in df.columns if 'WD50M' in col], errors='ignore')

# Create South Island dataset
south = df[[col for col in df.columns if 'SOUTH' in col or 'Date' in col]].copy()
south.rename(columns=lambda x: x.replace('_SOUTH', ''), inplace=True)

# Create North Island dataset
north = df[[col for col in df.columns if 'NORTH' in col or 'Date' in col]].copy()
north.rename(columns=lambda x: x.replace('_NORTH', ''), inplace=True)

print('South Island features:', south.columns.tolist())
print('North Island features:', north.columns.tolist())

## 🧭 South Island Forecasting
### SARIMA (RQ1) + SARIMAX with Lagged Climate Features (RQ2)

In [None]:
# Create lagged climate features for South
south['T2M_lag1'] = south['T2M'].shift(1)
south['PS_lag1'] = south['PS'].shift(1)
south['WS50M_lag1'] = south['WS50M'].shift(1)
south['RH2M_lag1'] = south['RH2M'].shift(1)
south['PRECTOTCORR_lag1'] = south['PRECTOTCORR'].shift(1)
south.dropna(inplace=True)

# SARIMA: Univariate
split_s = int(len(south) * 0.8)
train_sarima = south['GENERATION'][:split_s]
test_sarima = south['GENERATION'][split_s:]
sarima_model = SARIMAX(train_sarima, order=(1,1,1), seasonal_order=(1,1,1,52))
sarima_res = sarima_model.fit(disp=False)
pred_sarima = sarima_res.forecast(steps=len(test_sarima))
mape_sarima = np.mean(np.abs((test_sarima - pred_sarima) / test_sarima)) * 100

# SARIMAX: Lagged exog
endog = south['GENERATION']
exog = south[['T2M_lag1', 'PS_lag1', 'WS50M_lag1', 'RH2M_lag1', 'PRECTOTCORR_lag1']]
train_endog, test_endog = endog[:split_s], endog[split_s:]
train_exog, test_exog = exog[:split_s], exog[split_s:]
sarimax_model = SARIMAX(train_endog, exog=train_exog, order=(1,1,1), seasonal_order=(1,1,1,52))
sarimax_res = sarimax_model.fit(disp=False)
pred_sarimax = sarimax_res.forecast(steps=len(test_endog), exog=test_exog)
mape_sarimax = np.mean(np.abs((test_endog - pred_sarimax) / test_endog)) * 100

print(f"South Island SARIMA MAPE: {mape_sarima:.2f}%")
print(f"South Island SARIMAX (Lagged) MAPE: {mape_sarimax:.2f}%")

## 🧭 North Island Forecasting
### SARIMA (RQ1) + SARIMAX with Lagged Climate Features (RQ2)

In [None]:
# Create lagged climate features for North
north['T2M_lag1'] = north['T2M'].shift(1)
north['PS_lag1'] = north['PS'].shift(1)
north['WS50M_lag1'] = north['WS50M'].shift(1)
north['RH2M_lag1'] = north['RH2M'].shift(1)
north['PRECTOTCORR_lag1'] = north['PRECTOTCORR'].shift(1)
north.dropna(inplace=True)

# SARIMA: Univariate
split_n = int(len(north) * 0.8)
train_sarima_n = north['GENERATION'][:split_n]
test_sarima_n = north['GENERATION'][split_n:]
sarima_model_n = SARIMAX(train_sarima_n, order=(1,1,1), seasonal_order=(1,1,1,52))
sarima_res_n = sarima_model_n.fit(disp=False)
pred_sarima_n = sarima_res_n.forecast(steps=len(test_sarima_n))
mape_sarima_n = np.mean(np.abs((test_sarima_n - pred_sarima_n) / test_sarima_n)) * 100

# SARIMAX: Lagged exog
endog_n = north['GENERATION']
exog_n = north[['T2M_lag1', 'PS_lag1', 'WS50M_lag1', 'RH2M_lag1', 'PRECTOTCORR_lag1']]
train_endog_n, test_endog_n = endog_n[:split_n], endog_n[split_n:]
train_exog_n, test_exog_n = exog_n[:split_n], exog_n[split_n:]
sarimax_model_n = SARIMAX(train_endog_n, exog=train_exog_n, order=(1,1,1), seasonal_order=(1,1,1,52))
sarimax_res_n = sarimax_model_n.fit(disp=False)
pred_sarimax_n = sarimax_res_n.forecast(steps=len(test_endog_n), exog=test_exog_n)
mape_sarimax_n = np.mean(np.abs((test_endog_n - pred_sarimax_n) / test_endog_n)) * 100

print(f"North Island SARIMA MAPE: {mape_sarima_n:.2f}%")
print(f"North Island SARIMAX (Lagged) MAPE: {mape_sarimax_n:.2f}%")