##**Soft Voting Classifier**

In [5]:
import os
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt

from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.ensemble import ExtraTreesClassifier, VotingClassifier
from imblearn.over_sampling import SMOTE
from sklearn.base import is_classifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, precision_score, f1_score, recall_score

from keras.models import load_model
import joblib
from sklearn.model_selection import TimeSeriesSplit

In [2]:
# mount your google drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [32]:
# searching for files, load data and convert index to datetime type
def search_file(directory, filename):
    for root, dirs, files in os.walk(directory):
        if filename in files:
            return os.path.join(root, filename)
    return None

search_directory = '/content/drive/My Drive'
file_name = 'lagged_curtailment_target_features.csv'
file_path = search_file(search_directory, file_name)

df_lagged = pd.read_csv(file_path, sep = ';', index_col=0)
df_lagged.index = pd.to_datetime(df_lagged.index)

In [8]:
# load XGBoost classifier
xgboost_class = joblib.load('/content/drive/My Drive/wind_curtailment_prediction/xgboost_class.pkl')

# load Extra Trees classifier
extra_trees_clf = joblib.load('/content/drive/My Drive/wind_curtailment_prediction/extra_trees_classifier.pkl')

In [33]:
# impute, scale pipeline and smote (for class imbalance)
preprocessor = Pipeline([
    ('scaler', StandardScaler())
])

smote = SMOTE(k_neighbors=1, random_state=42)

# voting classifier with soft voting
voting_classifier = VotingClassifier(
    estimators=[
        ('xgboost', xgboost_class),
        ('extra_trees', extra_trees_clf)
    ],
    voting='soft',
    weights=[1, 1]
)

In [34]:
# get desired df size
start_date = '2022-01-01'
end_date = '2023-06-30'
df_lagged = df_lagged.loc[start_date:end_date]

X = df_lagged.drop(['redispatch', 'level'], axis = 1)
y = df_lagged['redispatch']

In [35]:
# time series cross-validation
n_splits = 10
gap = 48

tscv = TimeSeriesSplit(n_splits=n_splits, gap=gap)
train_f1_scores = []
train_precision_scores = []
test_f1_scores = []
test_precision_scores = []

# Iterate over each fold
for fold, (train_index, test_index) in enumerate(tscv.split(X), 1):
    print(f"Training on fold {fold}/{n_splits}")

    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    X_train_preprocessed = preprocessor.fit_transform(X_train)
    X_test_preprocessed = preprocessor.transform(X_test)

    X_train_resampled, y_train_resampled = smote.fit_resample(X_train_preprocessed, y_train)

    voting_classifier.fit(X_train_resampled, y_train_resampled)

    y_train_pred = voting_classifier.predict(X_train_preprocessed)
    y_test_pred = voting_classifier.predict(X_test_preprocessed)

    # evaluate
    train_f1 = f1_score(y_train, y_train_pred, average='binary', zero_division=1)
    train_precision = precision_score(y_train, y_train_pred, average='binary', zero_division=1)
    test_f1 = f1_score(y_test, y_test_pred, average='binary', zero_division=1)
    test_precision = precision_score(y_test, y_test_pred, average='binary', zero_division=1)

    train_f1_scores.append(train_f1)
    train_precision_scores.append(train_precision)
    test_f1_scores.append(test_f1)
    test_precision_scores.append(test_precision)

avg_train_f1 = np.mean(train_f1_scores)
avg_train_precision = np.mean(train_precision_scores)
avg_test_f1 = np.mean(test_f1_scores)
avg_test_precision = np.mean(test_precision_scores)

# Print the results
print("Average Train F1 Score:", avg_train_f1)
print("Average Train Precision:", avg_train_precision)
print("\nAverage Test F1 Score:", avg_test_f1)
print("Average Test Precision:", avg_test_precision)

Training on fold 1/10
Training on fold 2/10
Training on fold 3/10
Training on fold 4/10
Training on fold 5/10
Training on fold 6/10
Training on fold 7/10
Training on fold 8/10
Training on fold 9/10
Training on fold 10/10
Average Train F1 Score: 0.7008136203835795
Average Train Precision: 0.5690254824963213

Average Test F1 Score: 0.3635277784308423
Average Test Precision: 0.43367400359549013
