<a href="https://colab.research.google.com/github/mgondeck/wind_curtailment_prediction/blob/main/Shashank_DL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# packages
import os
import numpy as np
import pandas as pd

from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, InputLayer
from tensorflow.keras.optimizers import Adam

from sklearn.metrics import classification_report, confusion_matrix, precision_score, f1_score
from sklearn.model_selection import TimeSeriesSplit, KFold
from matplotlib import pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf

from keras.models import Model
from keras.layers import Input, Conv1D, BatchNormalization, ReLU, GlobalAveragePooling1D, Dense
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.utils import to_categorical


In [None]:
# mount your google drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
df = pd.read_csv('/content/drive/My Drive/ms_wind_curtailment_prediction/lagged_curtailment_target_features.csv', sep = ';', index_col=0)

In [None]:
df.index = pd.to_datetime(df.index)


In [None]:
# Identify significant lag values (where autocorrelation is outside the confidence intervals)
significant_lags = [48, 96]

df_lagged = df.copy()

# Create lagged features
for lag in significant_lags:
    df_lagged[f'redispatch_lag_{lag}'] = df_lagged['redispatch'].shift(lag)
    df_lagged[f'level_lag_{lag}'] = df_lagged['level'].shift(lag)

# Drop rows with NaN values resulting from the shifting
df_lagged.dropna(inplace=True)

In [None]:
df_lagged.columns

Index(['redispatch', 'level', 'wind_speed_m/s', 'wind_speed_m/s_lag1',
       'wind_speed_m/s_lag2', 'wind_speed_m/s_lag3', 'wind_speed_m/s_lag4',
       'wind_speed_m/s_lag5', 'wind_direction_degrees',
       'wind_direction_degrees_lag1', 'wind_direction_degrees_lag2',
       'wind_direction_degrees_lag3', 'wind_direction_degrees_lag4',
       'wind_direction_degrees_lag5', 'radiation_global_J/m2',
       'radiation_global_J/m2_lag1', 'radiation_global_J/m2_lag2',
       'radiation_global_J/m2_lag3', 'radiation_global_J/m2_lag4',
       'radiation_global_J/m2_lag5', 'air_temperature_K',
       'air_temperature_K_lag1', 'air_temperature_K_lag2',
       'air_temperature_K_lag3', 'air_temperature_K_lag4',
       'air_temperature_K_lag5', 'humidity_percent', 'humidity_percent_lag1',
       'humidity_percent_lag2', 'humidity_percent_lag3',
       'humidity_percent_lag4', 'humidity_percent_lag5', 'wind_gust_max_m/s',
       'wind_gust_max_m/s_lag1', 'wind_gust_max_m/s_lag2',
       'wind_g

In [None]:
# get desired df size
start_date = '2022-01-01'
end_date = '2023-06-30'
df_lagged = df_lagged.loc[start_date:end_date]

In [None]:
# impute, scale pipeline and smote (for class imbalance)
preprocessor = Pipeline([
    ('scaler', StandardScaler())
])

smote = SMOTE(random_state=42)

# define features X and target y
X = df.drop(['redispatch', 'level'], axis = 1)
y = df['redispatch']

In [None]:
# Define the number of splits for time series cross-validation
n_splits = 10
gap = 48  # 12 hour difference between train and test sets

# Define the TimeSeriesSplit cross-validation strategy
tscv = TimeSeriesSplit(n_splits=n_splits, gap=gap)

# Reshape the input data to have a 3D shape
X_reshaped = np.expand_dims(X, axis=-1)

# Define model architecture
def make_model(input_shape, num_classes):
    input_layer = Input(input_shape)

    conv1 = Conv1D(filters=64, kernel_size=3, padding="same")(input_layer)
    conv1 = BatchNormalization()(conv1)
    conv1 = ReLU()(conv1)

    conv2 = Conv1D(filters=64, kernel_size=3, padding="same")(conv1)
    conv2 = BatchNormalization()(conv2)
    conv2 = ReLU()(conv2)

    conv3 = Conv1D(filters=64, kernel_size=3, padding="same")(conv2)
    conv3 = BatchNormalization()(conv3)
    conv3 = ReLU()(conv3)

    gap = GlobalAveragePooling1D()(conv3)

    output_layer = Dense(num_classes, activation="softmax")(gap)

    return Model(inputs=input_layer, outputs=output_layer)

# Model parameters
input_shape = X_reshaped.shape[1:]
num_classes = len(np.unique(y))

# Define callbacks
callbacks = [
    ModelCheckpoint("best_model.keras", save_best_only=True, monitor="val_loss"),
    ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=20, min_lr=0.0001),
    EarlyStopping(monitor="val_loss", patience=50, verbose=1),
]

# Initialize lists to store evaluation metrics for each fold
f1_scores = []
precision_scores = []
train_f1_scores = []
train_precision_scores = []
test_f1_scores = []
test_precision_scores = []

# Iterate over each fold
for fold, (train_index, test_index) in enumerate(tscv.split(X_reshaped), 1):
    print(f"Training on fold {fold}/{n_splits}")

    # Get the data for this fold
    X_train_fold, X_test_fold = X_reshaped[train_index], X_reshaped[test_index]
    y_train_fold, y_test_fold = y[train_index], y[test_index]

    # Create model instance
    model = make_model(input_shape, num_classes)

    # Compile the model
    model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

    # Train the model
    history = model.fit(
        X_train_fold, y_train_fold,
        validation_data=(X_test_fold, y_test_fold),
        epochs=100,
        batch_size=32,
        callbacks=callbacks,
        verbose=1
    )

    # Predict on the validation set
    y_pred_val = model.predict(X_test_fold)
    y_pred_val_classes = np.argmax(y_pred_val, axis=1)

    # Calculate evaluation metrics for validation set
    f1 = f1_score(y_test_fold, y_pred_val_classes, average='macro', zero_division=1)
    precision = precision_score(y_test_fold, y_pred_val_classes, average='macro', zero_division=1)

    f1_scores.append(f1)
    precision_scores.append(precision)

    # Predict on the training set
    y_pred_train = model.predict(X_train_fold)
    y_pred_train_classes = np.argmax(y_pred_train, axis=1)

    # Calculate evaluation metrics for training set
    train_f1 = f1_score(y_train_fold, y_pred_train_classes, average='macro', zero_division=1)
    train_precision = precision_score(y_train_fold, y_pred_train_classes, average='macro', zero_division=1)

    train_f1_scores.append(train_f1)
    train_precision_scores.append(train_precision)

    # Predict on the test set
    y_pred_test = model.predict(X_test_fold)
    y_pred_test_classes = np.argmax(y_pred_test, axis=1)

    # Calculate evaluation metrics for test set
    test_f1 = f1_score(y_test_fold, y_pred_test_classes, average='macro', zero_division=1)
    test_precision = precision_score(y_test_fold, y_pred_test_classes, average='macro', zero_division=1)

    test_f1_scores.append(test_f1)
    test_precision_scores.append(test_precision)

# Print average scores across all folds
print("Average F1 score (test):", np.mean(test_f1_scores))
print("Average precision score (test):", np.mean(test_precision_scores))
print("Average F1 score (train):", np.mean(train_f1_scores))
print("Average precision score (train):", np.mean(train_precision_scores))


Training on fold 1/10
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 58: early stopping
Training on fold 2/10
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100


In [None]:
# Save deep learning model
model.save('/content/drive/MyDrive/ms_wind_curtailment_prediction/deep_learning_model.h5')

Average F1 score (test): 0.3791845523558293
Average precision score (test): 0.8365680605631448
Average F1 score (train): 0.3862333438384388
Average precision score (train): 0.8033983682401675

Average F1 score (test): 0.3670776067594171
Average precision score (test): 0.8090675531144104
Average F1 score (train): 0.37155752353062355
Average precision score (train): 0.8120026256119013