**Map EEG-keys with algorithm**

In [None]:
# packages
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Flatten, Dense

import seaborn as sns

In [None]:
# select important features and target value
columns_to_select = ['plant_key_eeg', 'plant_type']
df_eeg_analyse = df_eeg[columns_to_select]

# drop rows with NaN values 
df_eeg_analyse = df_eeg_analyse.dropna()

# Apply label encoding to the 'plant_type' column
label_encoder = LabelEncoder()
df_eeg_analyse['plant_type_encoded'] = label_encoder.fit_transform(df_eeg_analyse['plant_type'])

# Print the mapping between original categories and encoded labels
print("Original Categories:", label_encoder.classes_)
print("\nEncoded Labels:", label_encoder.transform(label_encoder.classes_))

# Extract features from EEG keys and convert to numeric values
df_eeg_analyse['operation_number'] = df_eeg_analyse['plant_key_eeg'].str[2:6]  # von der BNetzA vergebenen Betriebsnummer des Netzbetreibers
df_eeg_analyse['network_number'] = df_eeg_analyse['plant_key_eeg'].str[6:8]  # von der BNetzA vergebene Netznummer des Netzes
df_eeg_analyse['plant_number'] = df_eeg_analyse['plant_key_eeg'].str[-10:]  # (eig 8 aber dann buchstabe mit drin) Netzbetreiber-individuelle, alphanumerische Bezeichnung der Anlage

# train random forest to maß features to plant key
X = df_eeg_analyse[['operation_number', 'network_number', 'plant_number']] 
y = df_eeg_analyse['plant_type_encoded']  

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify = y, test_size=0.2, random_state=42)

eeg_plant_model = RandomForestClassifier(n_estimators=100, random_state=7)
eeg_plant_model.fit(X_train, y_train)

# test model
y_pred = eeg_plant_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

conf_matrix = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', cbar=False)
plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.title('Confusion Matrix')
plt.show()

# features
df_redispatch['operation_number'] = df_redispatch['plant_key'].str[2:6]
df_redispatch['network_number'] = df_redispatch['plant_key'].str[6:8]
df_redispatch['plant_number'] = df_redispatch['plant_key'].str[-10:]

df_redispatch['operation_number'] = pd.to_numeric(df_redispatch['operation_number'], errors='coerce')
df_redispatch['network_number'] = pd.to_numeric(df_redispatch['network_number'], errors='coerce')
df_redispatch['plant_number'] = pd.to_numeric(df_redispatch['plant_number'], errors='coerce')

df_redispatch.dropna(inplace=True)

encoded_to_plant_type = {0: 'biomass', 1: 'solar', 2: 'wind'}
predicted_plant_type_encoded = eeg_plant_model.predict(df_redispatch[['operation_number', 'network_number', 'plant_number']])
predicted_plant_type = [encoded_to_plant_type[label] for label in predicted_plant_type_encoded]
df_redispatch['predicted_plant_type'] = predicted_plant_type

predicted_plant_probabilities = eeg_plant_model.predict_proba(df_redispatch[['operation_number', 'network_number', 'plant_number']])

df_redispatch['prob_biomass'] = predicted_plant_probabilities[:, 0]  
df_redispatch['prob_solar'] = predicted_plant_probabilities[:, 1]  
df_redispatch['prob_wind'] = predicted_plant_probabilities[:, 2]  