##**Binary Classification with a Deep Learning Model**

In [1]:
# packages
import os
import numpy as np
import pandas as pd

from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, InputLayer
from tensorflow.keras.optimizers import Adam

from sklearn.metrics import classification_report, confusion_matrix, precision_score, f1_score
from sklearn.model_selection import TimeSeriesSplit, KFold
from matplotlib import pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf

from keras.models import Model
from keras.layers import Input, Conv1D, BatchNormalization, ReLU, GlobalAveragePooling1D, Dense
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.utils import to_categorical

In [2]:
# mount your google drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


**Load lagged data**

In [3]:
# searching for files, load data and convert index to datetime type
def search_file(directory, filename):
    for root, dirs, files in os.walk(directory):
        if filename in files:
            return os.path.join(root, filename)
    return None

search_directory = '/content/drive/My Drive'
file_name = 'lagged_curtailment_target_features_extended.csv'
file_path = search_file(search_directory, file_name)

df_lagged = pd.read_csv(file_path, sep = ';', index_col=0)
df_lagged.index = pd.to_datetime(df_lagged.index)

In [4]:
# get desired df size
start_date = '2022-01-01'
end_date = '2023-06-30'
df_lagged = df_lagged.loc[start_date:end_date]

In [5]:
# impute, scale pipeline and smote (for class imbalance)
preprocessor = Pipeline([
    ('scaler', StandardScaler())
])

smote = SMOTE(random_state=42)

X = df_lagged.drop(['redispatch', 'level'], axis = 1)
y = df_lagged['redispatch']

In [None]:
# reshape input data
X_reshaped = np.expand_dims(X, axis=-1)

# model architecture
def make_model(input_shape, num_classes):
    input_layer = Input(input_shape)

    conv1 = Conv1D(filters=64, kernel_size=3, padding="same")(input_layer)
    conv1 = BatchNormalization()(conv1)
    conv1 = ReLU()(conv1)

    conv2 = Conv1D(filters=64, kernel_size=3, padding="same")(conv1)
    conv2 = BatchNormalization()(conv2)
    conv2 = ReLU()(conv2)

    conv3 = Conv1D(filters=64, kernel_size=3, padding="same")(conv2)
    conv3 = BatchNormalization()(conv3)
    conv3 = ReLU()(conv3)

    gap = GlobalAveragePooling1D()(conv3)

    output_layer = Dense(num_classes, activation="softmax")(gap)

    return Model(inputs=input_layer, outputs=output_layer)

In [7]:
input_shape = X_reshaped.shape[1:]
num_classes = len(np.unique(y))

callbacks = [
    ModelCheckpoint("best_model.keras", save_best_only=True, monitor="val_loss"),
    ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=20, min_lr=0.0001),
    EarlyStopping(monitor="val_loss", patience=50, verbose=1),
]

train_f1_scores = []
train_precision_scores = []
test_f1_scores = []
test_precision_scores = []

# time series cross-validation
n_splits = 10
gap = 48  # 12 hour difference between train and test sets
tscv = TimeSeriesSplit(n_splits=n_splits, gap=gap)

# iterate over each fold
for fold, (train_index, test_index) in enumerate(tscv.split(X_reshaped), 1):
    print(f"Training on fold {fold}/{n_splits}")

    X_train_fold, X_test_fold = X_reshaped[train_index], X_reshaped[test_index]
    y_train_fold, y_test_fold = y[train_index], y[test_index]

    model = make_model(input_shape, num_classes)
    model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

    history = model.fit(
        X_train_fold, y_train_fold,
        validation_data=(X_test_fold, y_test_fold),
        epochs=100,
        batch_size=32,
        callbacks=callbacks,
        verbose=1
    )

    y_pred_train = model.predict(X_train_fold)
    y_pred_train_classes = np.argmax(y_pred_train, axis=1)
    train_f1 = f1_score(y_train_fold, y_pred_train_classes, average='binary', zero_division=1)
    train_precision = precision_score(y_train_fold, y_pred_train_classes, average='binary', zero_division=1)
    train_f1_scores.append(train_f1)
    train_precision_scores.append(train_precision)

    y_pred_test = model.predict(X_test_fold)
    y_pred_test_classes = np.argmax(y_pred_test, axis=1)
    test_f1 = f1_score(y_test_fold, y_pred_test_classes, average='binary', zero_division=1)
    test_precision = precision_score(y_test_fold, y_pred_test_classes, average='binary', zero_division=1)
    test_f1_scores.append(test_f1)
    test_precision_scores.append(test_precision)

print("Average F1 score (test):", np.mean(test_f1_scores))
print("Average precision score (test):", np.mean(test_precision_scores))
print("Average F1 score (train):", np.mean(train_f1_scores))
print("Average precision score (train):", np.mean(train_precision_scores))


Training on fold 1/10
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 52: early stopping
Training on fold 2/10
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100


In [None]:
# create folder in google drive if not exists
folder_path = '/content/drive/My Drive/wind_curtailment_prediction'

if not os.path.exists(folder_path):
    os.makedirs(folder_path)
    print("Folder created successfully.")
else:
    print("Folder already exists.")

In [None]:
# save deep learning model
model.save('/content/drive/MyDrive/wind_curtailment_prediction/deep_learning_model.h5')