In [None]:
import os
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_curve, auc
import matplotlib.pyplot as plt

# PICKLE_DIR_SMALL = "./"  # Update this to the folder where your pickle files are stored
save_path = 'D:/9999_OneDrive_ZHAW/OneDrive - ZHAW/BA_ZHAW_RTO/models/mimii/'
# Function to load data
def load_data_with_key(key, pickle_dir):
    files, data_norm, labels_norm, data_n_norm = load_data_sub(key + "_norm", pickle_dir)
    _, data_abnorm, _, data_n_abnorm = load_data_sub(key + "_abnorm", pickle_dir)
    return data_n_norm, data_n_abnorm

# Function to split data into training and evaluation sets
def split_data(data_n_norm, data_n_abnorm, train_ratio=0.6):
    X = np.concatenate((data_n_norm, data_n_abnorm))
    y = np.concatenate((np.zeros(len(data_n_norm)), np.ones(len(data_n_abnorm))))
    X_train, X_eval, y_train, y_eval = train_test_split(X, y, train_size=train_ratio, random_state=42, stratify=y)
    return X_train, X_eval, y_train, y_eval

# Function to create, train, visualize and evaluate autoencoder for each dataset
def train_and_evaluate_autoencoder(keys, pickle_dir, config):
    for key in keys:
        print(f"Processing {key}")
        data_n_norm, data_n_abnorm = load_data_with_key(key, pickle_dir)
        X_train, X_eval, y_train, y_eval = split_data(data_n_norm, data_n_abnorm)

        input_shape = X_train.shape[1:]
        output_shape = X_train.shape[1:]

        autoencoder, encoder, decoder = create_autoencoder(input_shape, output_shape, config)

        # Training the autoencoder
        history = autoencoder.fit(X_train, X_train, epochs=config['epochs'], batch_size=config['batch_size'], validation_data=(X_eval, X_eval))

        # Visualizing the loss
        plt.plot(history.history['loss'])
        plt.plot(history.history['val_loss'])
        plt.title('Model loss')
        plt.ylabel('Loss')
        plt.xlabel('Epoch')
        plt.legend(['Train', 'Validation'], loc='upper right')
        plt.show()

        # Evaluating the autoencoder
        X_eval_encoded = encoder.predict(X_eval)
        X_eval_decoded = decoder.predict(X_eval_encoded)
        reconstruction_error = np.mean((X_eval - X_eval_decoded) ** 2, axis = 1)

        # Precision-Recall curve and AUC
        precision, recall, _ = precision_recall_curve(y_eval, reconstruction_error)
        pr_auc = auc(recall, precision)

        print(f"Reconstruction error: {reconstruction_error}")
        print(f"Precision-Recall AUC: {pr_auc}")

        # Saving the model
        autoencoder.save(save_path + f"{key}_autoencoder.h5")
        encoder.save(save_path + f"{key}_encoder.h5")
        decoder.save(save_path + f"{key}_decoder.h5")


In [None]:
config = {
    'n_ls_a': 4,
    'n_cl_a': 100,
    'n_hl_a': 2,
    'activ': 'relu',
    'seed': 42,
    'epochs': 50,
    'batch_size': 128,
    'patience': 15,
}


In [None]:
train_and_evaluate_autoencoder(['id00_6dB'], PICKLE_DIR_SMALL, config)