In [4]:
# packages
import os
import numpy as np
import pandas as pd

from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE

from xgboost import XGBClassifier

from sklearn.metrics import classification_report

from matplotlib import pyplot as plt

In [2]:
# mount your google drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
df = pd.read_csv('/content/drive/My Drive/ms_wind_curtailment_prediction/lagged_curtailment_target_features.csv', sep = ';', index_col=0)

In [5]:
# Split the entire dataset into training and test sets
cutoff_time = "2023-01-01"

# train data
train = df[df.index < cutoff_time]
X_train = train.drop(['redispatch', 'level'], axis = 1)
y_train = train['redispatch']

# test data
test = df[df.index >= cutoff_time]
X_test = test.drop(['redispatch', 'level'], axis = 1)
y_test = test['redispatch']

# weights
# weights = df['level'] ## to be done

In [6]:
# preprocessing pipelines
preprocessor = Pipeline([
    ('imputer', SimpleImputer(strategy='constant', fill_value=0)),
    ('scaler', StandardScaler())
])

smote = SMOTE(random_state=13)

X_train_preprocessed = preprocessor.fit_transform(X_train)
X_train_preprocessed, y_train_preprocessed = smote.fit_resample(X_train_preprocessed, y_train)
X_test_preprocessed = preprocessor.transform(X_test)

In [9]:
# create model instance
xgboost_class = XGBClassifier(n_estimators=2, max_depth=2, learning_rate=1, objective='binary:logistic', random_state = 9)
# fit model
xgboost_class.fit(X_train_preprocessed, y_train_preprocessed)
# make predictions
y_pred = xgboost_class.predict(X_test_preprocessed)

In [10]:
# evaluate
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00     32458
         1.0       0.96      0.96      0.96      2484

    accuracy                           0.99     34942
   macro avg       0.98      0.98      0.98     34942
weighted avg       0.99      0.99      0.99     34942

