In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, LeakyReLU
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
from tensorflow.keras.callbacks import EarlyStopping

# Load datasets
wind_data = pd.read_csv('wind_data.csv')
avg_ps = pd.read_csv('Wind_Factor_Daily_AVG_PS.csv')
avg_t2m = pd.read_csv('Wind_Factor_Daily_AVG_T2M.csv')
avg_wd50m = pd.read_csv('Wind_Factor_Daily_AVG_WD50M.csv')
avg_ws50m = pd.read_csv('Wind_Factor_Daily_AVG_WS50M.csv')

# Preprocessing
wind_data = wind_data.rename(columns={'Trading_date': 'DATE'})
for df in [wind_data, avg_ps, avg_t2m, avg_wd50m, avg_ws50m]:
    df['DATE'] = pd.to_datetime(df['DATE'])

# Merge datasets on DATE
data_df = wind_data.merge(avg_ps, on="DATE")                    .merge(avg_t2m, on="DATE")                    .merge(avg_wd50m, on="DATE")                    .merge(avg_ws50m, on="DATE")

# Drop missing and filter Wind data
data_df = data_df[data_df["Fuel_Code"] == "Wind"].drop(columns=["Fuel_Code"])
data_df.set_index("DATE", inplace=True)
data_df.dropna(inplace=True)

# Correlation analysis
corr = data_df.corr()
top2 = corr["Total_TP"].abs().sort_values(ascending=False)[1:3].index.tolist()
print("Top 2 correlated variables:", top2)

# Define features and target
X = data_df[top2].values
y = data_df["Total_TP"].values

# Scale features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
split_idx = int(len(X) * 0.8)
X_train, X_test = X_scaled[:split_idx], X_scaled[split_idx:]
y_train, y_test = y[:split_idx], y[split_idx:]

# ANN Model
model = Sequential([
    Dense(64, input_shape=(X_train.shape[1],)),
    LeakyReLU(),
    BatchNormalization(),
    Dropout(0.2),
    Dense(32),
    LeakyReLU(),
    Dense(1)
])

model.compile(optimizer='adam', loss='mse')
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

history = model.fit(X_train, y_train, validation_split=0.2,
                    epochs=100, batch_size=32, callbacks=[early_stop], verbose=0)

# Predict
predictions = model.predict(X_test).flatten()

# Evaluation
mae = mean_absolute_error(y_test, predictions)
rmse = np.sqrt(mean_squared_error(y_test, predictions))

def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

mape = mean_absolute_percentage_error(y_test, predictions)

print(f"✅ ANN MAE: {mae:,.2f}")
print(f"✅ ANN RMSE: {rmse:,.2f}")
print(f"✅ ANN MAPE: {mape:.2f}%")

# Plot actual vs predicted
plt.figure(figsize=(10, 4))
plt.plot(y_test, label="Actual")
plt.plot(predictions, label="Predicted", linestyle="--")
plt.title("ANN: Actual vs Predicted Wind Generation")
plt.xlabel("Time")
plt.ylabel("Generation")
plt.legend()
plt.tight_layout()
plt.show()
