## Baseline Binary Classification Model

In [1]:
# packages
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import confusion_matrix, precision_score, f1_score

In [2]:
# mount your google drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
# searching for files, load data and convert index to datetime type
def search_file(directory, filename):
    for root, dirs, files in os.walk(directory):
        if filename in files:
            return os.path.join(root, filename)
    return None

search_directory = '/content/drive/My Drive'
file_name = 'curtailment_target_features.csv'
file_path = search_file(search_directory, file_name)

df = pd.read_csv(file_path, sep = ';', index_col=0)
df.index = pd.to_datetime(df.index)

**Baseline Model**

Assign a positive redispatch status when the wind speed exceeds a specific value

In [6]:
# selects only the feature 'wind gust max' & replace NaN values with mean
df = df[['redispatch', 'wind_gust_max_m/s']]
df['wind_gust_max_m/s'].fillna(df['wind_gust_max_m/s'].mean(), inplace=True)

In [7]:
# rows with wind speed above 9 m/s are considered as redispatch
df.groupby('redispatch').describe()

Unnamed: 0_level_0,wind_gust_max_m/s,wind_gust_max_m/s,wind_gust_max_m/s,wind_gust_max_m/s,wind_gust_max_m/s,wind_gust_max_m/s,wind_gust_max_m/s,wind_gust_max_m/s
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
redispatch,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
0.0,131810.0,6.583379,3.878469,0.0,3.6,6.0,8.8,30.2
1.0,8398.0,9.233684,3.969521,0.3,6.35,9.2,11.65,26.1


In [13]:
# get desired df size
start_date = '2022-01-01'
end_date = '2023-06-30'
df = df.loc[start_date:end_date]

# features X and target y
X = df['wind_gust_max_m/s']
y = df['redispatch']

precision_scores = []
f1_scores = []
conf_matrices = []

# make predictions
y_pred = [1 if wind_gust_max > 9 else 0 for wind_gust_max in X]
precision_scores.append(precision_score(y, y_pred, average='binary', zero_division=1))
f1_scores.append(f1_score(y, y_pred, average='binary', zero_division=1))
conf_matrices.append(confusion_matrix(y, y_pred))

# print evaluation
print("Average Scores:")
print("Precision (Test):", np.array(precision_scores).mean())
print("F1-Scores (Test):", np.array(f1_scores).mean())

confusion_matrix_test = False
if confusion_matrix_test:
  average_conf_matrix = np.round(sum(conf_matrices) / len(conf_matrices)).astype(int)
  print("Average Confusion Matrix:")
  print(f"{'True Negative':<20} {'False Positive':<20}")
  print(f"{average_conf_matrix[0][0]:<20} {average_conf_matrix[0][1]:<20}")
  print(f"{'False Negative':<20} {'True Positive':<20}")
  print(f"{average_conf_matrix[1][0]:<20} {average_conf_matrix[1][1]:<20}")

Average Scores:
Precision (Test): 0.255848469856527
F1-Scores (Test): 0.33960317088043535
