## Imports

In [None]:
# Import der notwendigen Bibliotheken für Datenverarbeitung, Machine Learning und Visualisierung
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras import layers, models, callbacks
from tensorflow.keras.utils import Sequence, to_categorical
from tensorflow.keras.layers import GRU, Dropout, Dense, BatchNormalization, Bidirectional
from tensorflow.keras.regularizers import l2
from tensorflow.keras.models import load_model
import matplotlib.pyplot as plt
from sklearn.utils.class_weight import compute_class_weight
import tensorflow as tf
from sklearn.metrics import classification_report, multilabel_confusion_matrix
import seaborn as sns
import os

## Datenvorbereitung

In [None]:
# Mapping von Dateinamen zu Labels für die Geräteklassifikation
device_mapping = {
    "washing_machine": 0,
    "dishwasher": 1,
    "freezer": 2,
    "fridge": 3,
    "micro_wave_oven": 4
}

# Anzahl der Klassen basierend auf dem Mapping
num_classes = len(device_mapping)

In [None]:
# Liste der zu verwendenden CSV-Dateien
selected_files = [
    "washing_machine_343_minimal_length.csv",
    "dishwasher_53_minimal_length.csv",
    "freezer_249_minimal_length.csv",
    "fridge_98_minimal_length.csv",
    "fridge_207_minimal_length.csv",
    "fridge_284_minimal_length.csv",
    "fridge_317_minimal_length.csv",
    "micro_wave_oven_147_minimal_length.csv",
    "micro_wave_oven_314_minimal_length.csv",
    "washing_machine_32_minimal_length.csv",
    "washing_machine_52_minimal_length.csv",
    "washing_machine_135_minimal_length.csv",
    "washing_machine_157_minimal_length.csv",
    "washing_machine_218_minimal_length.csv"
]

In [None]:
# Funktion zum Laden und Vorverarbeiten der Gerätedaten aus CSV-Dateien
def load_device_data(file_path, label, peak_offset, scaler=None):
    df = pd.read_csv(file_path)
    df['label'] = label
    df['peak_number'] += peak_offset
    if scaler:
        df['power'] = scaler.transform(df[['power']])
    return df, df['peak_number'].max() + 1

In [None]:
# Initialisieren eines StandardScalers für die Normalisierung der Leistungsdaten
scaler = StandardScaler()

In [None]:
# Laden und Kombinieren aller Gerätedaten aus den CSV-Dateien
data = []
directory = "."
peak_offset = 0
all_data = pd.DataFrame()

# Iterieren durch die ausgewählten Dateien und Laden der Daten
for file_name in selected_files:
    device_name = "_".join(file_name.split("_")[:-3])
    label = device_mapping.get(device_name)
    if label is not None:
        file_path = os.path.join(directory, file_name)
        device_data, peak_offset = load_device_data(file_path, label, peak_offset)
        all_data = pd.concat([all_data, device_data])

In [None]:
# Fit den Scaler auf alle Leistungsdaten
scaler.fit(all_data[['power']])

In [None]:
# Laden der Gerätedaten nach Skalierung mit dem StandardScaler
data = []
peak_offset = 0

for file_name in selected_files:
    device_name = "_".join(file_name.split("_")[:-3])
    label = device_mapping.get(device_name)
    if label is not None:
        file_path = os.path.join(directory, file_name)
        device_data, peak_offset = load_device_data(file_path, label, peak_offset, scaler=scaler)
        data.append(device_data)

In [None]:
# Kombinieren aller Daten in einem DataFrame
df_all = pd.concat(data, ignore_index=True)

In [None]:
# Gruppieren der Daten nach Peak-Nummer für die Zeitreihen
grouped = df_all.groupby('peak_number')
time_series = [group['power'].values for name, group in grouped]
labels = [group['label'].iloc[0] for name, group in grouped]

In [None]:
# Konvertieren der Labels in eine binäre Matrix für die Multi-Label-Klassifikation
labels = np.array([to_categorical(label, num_classes=num_classes) for label in labels])

In [None]:
# Bestimmen der maximalen Länge für das Padding der Zeitreihen
max_length = max(len(ts) for ts in time_series)

## Datenaufbereitung für das Modell

In [None]:
# Definition eines Data Generators für das Batch-Training
class DataGenerator(Sequence):
    def __init__(self, time_series, labels, batch_size, max_length):
        self.time_series = time_series
        self.labels = labels
        self.batch_size = batch_size
        self.max_length = max_length

    def __len__(self):
        return int(np.ceil(len(self.time_series) / self.batch_size))

    def __getitem__(self, idx):
        batch_x = self.time_series[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_y = self.labels[idx * self.batch_size:(idx + 1) * self.batch_size]

        # Anwenden von Padding auf die Zeitreihen
        batch_x_padded = np.array([np.pad(ts, (0, self.max_length - len(ts)), 'constant') for ts in batch_x])
        batch_x_padded = np.expand_dims(batch_x_padded, axis=-1)

        return np.array(batch_x_padded), np.array(batch_y)

In [None]:
# Aufteilen der Zeitreihen-Daten in Trainings- und Testsets
x_train, x_test, y_train, y_test = train_test_split(time_series, labels, test_size=0.2, random_state=42)

In [None]:
# Berechnen der Klassen-Gewichte für das unbalancierte Dataset
y_train_flat = np.argmax(y_train, axis=1)
class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_train_flat), y=y_train_flat)
class_weight_dict = dict(enumerate(class_weights))

In [None]:
# Definition der Batch-Größe
batch_size = 32
train_gen = DataGenerator(x_train, y_train, batch_size, max_length)
test_gen = DataGenerator(x_test, y_test, batch_size, max_length)

## Modelldefinition und Training

In [None]:
# Definition eines CNN-RNN-Modells für die Klassifikation von Zeitreihen
model = models.Sequential()

# Convolutional Layer für Feature-Extraktion
model.add(layers.Conv1D(filters=128, kernel_size=3, activation='relu', input_shape=(max_length, 1)))
model.add(BatchNormalization())
model.add(layers.MaxPooling1D(pool_size=2))
model.add(Dropout(0.3))

# Bidirektionale GRU-Schichten
model.add(Bidirectional(GRU(256, return_sequences=True)))
model.add(Dropout(0.3))
model.add(Bidirectional(GRU(256)))
model.add(BatchNormalization())

# Dense Layer mit Regularisierung
model.add(Dense(256, activation='relu', kernel_regularizer=l2(0.01)))
model.add(Dropout(0.4))
model.add(Dense(num_classes, activation='sigmoid'))  # Sigmoid für Multi-Label-Klassifikation

In [None]:
# Kompilieren des Modells mit Adam Optimizer und Loss für Multi-Label Klassifikation
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# Lernratenscheduler und Early Stopping
lr_scheduler = callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=0.00001)
early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

In [None]:
# Trainieren des Modells
history = model.fit(train_gen, epochs=100, validation_data=test_gen, callbacks=[lr_scheduler, early_stopping], class_weight=class_weight_dict)

## Modellbewertung und Visualisierung

In [None]:
# Bewertung des Modells auf den Testdaten
test_loss, test_acc = model.evaluate(test_gen)
print(f"Test accuracy: {test_acc}")

In [None]:
# Plotten des Trainings- und Validierungsverlusts
plt.figure()
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Val'], loc='best')
plt.show()

In [None]:
# Modell speichern
model.save("appliance_classification_model.keras")

In [None]:
# Modell laden
model = load_model('appliance_classification_model.keras')

In [None]:
# Vorhersagen auf dem gesamten Testdatensatz in einem Schritt sammeln
x_test_all, y_test_all = test_gen[0]
for i in range(1, len(test_gen)):
    batch_x, batch_y = test_gen[i]
    x_test_all = np.vstack((x_test_all, batch_x))
    y_test_all = np.vstack((y_test_all, batch_y))

y_pred_all = model.predict(x_test_all)
y_pred_rounded = np.round(y_pred_all)

In [None]:
# Generieren eines Classification Reports
report = classification_report(y_test_all, y_pred_rounded, target_names=device_mapping.keys())
print("Classification Report:\n", report)

# Berechnung der Multilabel Confusion Matrix
conf_matrix = multilabel_confusion_matrix(y_test_all, y_pred_rounded)

In [None]:
# Confusion Matrix für jedes Gerät plotten
for i, (label, matrix) in enumerate(zip(device_mapping.keys(), conf_matrix)):
    plt.figure(figsize=(6, 4))
    sns.heatmap(matrix, annot=True, fmt='d', cmap='Blues', xticklabels=["Not " + label, label], yticklabels=["Not " + label, label])
    plt.title(f'Confusion Matrix for {label}')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.show()