In [15]:
import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.ensemble import ExtraTreesClassifier
from imblearn.over_sampling import SMOTE

from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score
from keras.models import load_model
from sklearn.ensemble import ExtraTreesClassifier
import joblib

from sklearn.metrics import classification_report, confusion_matrix, precision_score, f1_score
from sklearn.model_selection import TimeSeriesSplit

In [2]:
# mount your google drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Loading the Deep learning model and the Extra trees classifier model

In [23]:
# Load deep learning model
# deep_learning_model = load_model('deep_learning_model.h5')

# Load XGBoost classifier
xgboost_class = joblib.load('/content/drive/My Drive/ms_wind_curtailment_prediction/xgboost_class.pkl')

# Load Extra Trees classifier
extra_trees_clf = joblib.load('/content/drive/My Drive/ms_wind_curtailment_prediction/extra_trees_classifier.pkl')


In [31]:
df = pd.read_csv('/content/drive/My Drive/ms_wind_curtailment_prediction/lagged_curtailment_target_features.csv', sep = ';', index_col=0)

In [35]:
# get desired df size
start_date = '2021-01-01'
end_date = '2023-06-30'
df = df.loc[start_date:end_date]

In [33]:
# impute, scale pipeline and smote (for class imbalance)
preprocessor = Pipeline([
    ('scaler', StandardScaler())
])

smote = SMOTE(random_state=13)

# define features X and target y
X = df.drop(['redispatch', 'level'], axis = 1)
y = df['redispatch']

Voting use a soft vote

In [36]:
# Create a voting classifier with soft voting
voting_classifier = VotingClassifier(
    estimators=[
        ('xgboost', xgboost_class),
        ('extra_trees', extra_trees_clf)
    ],
    voting='soft'
)

# define features X and target y
X = df.drop(['redispatch', 'level'], axis = 1)
y = df['redispatch']

# cross-validation
n_splits = 70
test_size = 48 #(48 - 12h with 15 min intervalls)
tscv = TimeSeriesSplit(n_splits=n_splits, test_size=test_size, gap = 10)

precision_scores = []
f1_scores = []
conf_matrices = []
precision_train_scores = []
f1_train_scores = []
conf_train_matrices = []

for fold, (train_index, test_index) in enumerate(tscv.split(X), 1):
    print(f"Training on fold {fold}/{n_splits}")

    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # having at least one instance of redispatch 1 and 0 in test
    if y_test.sum() == 0 or y_test.sum() == test_size:
        continue

    # preprocess train and test data
    X_train_scaled = preprocessor.fit_transform(X_train)
    X_train_preprocessed, y_train_preprocessed = smote.fit_resample(X_train_scaled, y_train)
    X_test_preprocessed = preprocessor.transform(X_test)

    # fit the model
    voting_classifier.fit(X_train_preprocessed, y_train_preprocessed)

    # Make predictions using the voting classifier
    threshold = 0.7
    y_prob_voting = voting_classifier.predict_proba(X_test_preprocessed)
    y_pred_voting = (y_prob_voting[:, 1] >= threshold).astype(int)
    y_train_prob_voting = voting_classifier.predict_proba(X_train_preprocessed)
    y_pred_train_voting = (y_train_prob_voting[:, 1] >= threshold).astype(int)

    # evaluate
    print("Calculating evaluation metrics for test set...")
    precision_scores.append(precision_score(y_test, y_pred_voting))
    f1_scores.append(f1_score(y_test, y_pred_voting))
    conf_matrices.append(confusion_matrix(y_test, y_pred_voting))

    print("Calculating evaluation metrics for train set...")
    precision_train_scores.append(precision_score(y_train_preprocessed, y_pred_train_voting))
    f1_train_scores.append(f1_score(y_train_preprocessed, y_pred_train_voting))
    conf_train_matrices.append(confusion_matrix(y_train_preprocessed, y_pred_train_voting))

# print evaluation results
print("Average Scores:")
print("Precision:", np.array(precision_scores).mean())
print("F1-Scores:", np.array(f1_scores).mean())
print("Confusion Matrix:", sum(conf_matrices)/len(conf_matrices))
print("Precision (Train):", np.array(precision_train_scores).mean())
print("F1-Scores (Train):", np.array(f1_train_scores).mean())
print("Confusion Matrix (Train):", sum(conf_train_matrices)/len(conf_train_matrices))

Training on fold 1/70
Training on fold 2/70
Training on fold 3/70
Training on fold 4/70
Training on fold 5/70
Training on fold 6/70
Training on fold 7/70
Training on fold 8/70
Training on fold 9/70
Training on fold 10/70
Training on fold 11/70
Training on fold 12/70
Training on fold 13/70
Training on fold 14/70
Calculating evaluation metrics for test set...
Calculating evaluation metrics for train set...
Training on fold 15/70
Training on fold 16/70
Training on fold 17/70
Training on fold 18/70
Training on fold 19/70
Training on fold 20/70
Training on fold 21/70
Training on fold 22/70
Training on fold 23/70
Training on fold 24/70
Training on fold 25/70
Training on fold 26/70
Training on fold 27/70
Training on fold 28/70
Training on fold 29/70
Training on fold 30/70
Training on fold 31/70
Training on fold 32/70
Calculating evaluation metrics for test set...
Calculating evaluation metrics for train set...
Training on fold 33/70
Training on fold 34/70
Training on fold 35/70
Training on fo