In [None]:
# %pip install pydot pydot-ng graphviz
# %pip install ann_visualizer

In [None]:
import os
import time
import shap
import gc
import zipfile
import pathlib
from pathlib import Path
import librosa
from collections import Counter
from pydub import AudioSegment
from pydub.silence import detect_silence
import matplotlib
matplotlib.use("Agg")  # Kein Speicherverbrauch für Plots
import matplotlib.pyplot as plt
import librosa.display
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from types import SimpleNamespace
import traceback
from concurrent.futures import ProcessPoolExecutor
import multiprocessing
from concurrent.futures import ThreadPoolExecutor
import concurrent.futures
from tensorflow.keras.applications import ResNet50, ResNet101, ResNet152
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K
from tensorboard.plugins.hparams import api as hp
import pandas as pd

import visualkeras
from tensorflow.keras.utils import plot_model
# from ann_visualizer.visualize import ann_viz

import json
from PIL import Image
import shutil
import csv
from IPython.display import clear_output


img_height, img_width = 128, 128
BATCH_SIZE = 16
epochs = 20
stop = False

use_resnet = True
model_optimization = False

use_siamese = False

use_shap_values = False

check_image = False
max_length = 0
downsize = True


HP_DROPOUT = hp.HParam('dropout', hp.RealInterval(0.0, 0.5))
HP_LEARNING = hp.HParam('learning', hp.RealInterval(0.00005, 0.0005))
# HP_ACTIVATION = hp.HParam('activation', hp.Discrete(['relu', 'tanh', 'sigmoid']))
HP_ACTIVATION = hp.HParam('activation', hp.Discrete(['relu']))
# Log Directory for HParams
LOG_DIR = 'logs/hparam_tuning'



ROOT_DIR = Path('../').resolve() 
ZIP_DIR = ROOT_DIR / 'data'  
UNZIP_DIR = ROOT_DIR / 'Unzipped_Data_Picture' 
TEST_DIR = UNZIP_DIR / 'new_test_ds'

# # Small Dataset
TRAIN_DIR = UNZIP_DIR / 'small_train_ds'
VAL_DIR = UNZIP_DIR / 'small_val_ds'
# TEST_DIR = UNZIP_DIR / 'small_test_ds'

# # Small 3 Labels Dataset
# TRAIN_DIR = UNZIP_DIR / '3_small_train_ds'
# VAL_DIR = UNZIP_DIR / '3_small_val_ds'
# TEST_DIR = UNZIP_DIR / '3_small_test_ds'

# Medium Dataset
# TRAIN_DIR = UNZIP_DIR / 'medium_train_ds'
# VAL_DIR = UNZIP_DIR / 'medium_val_ds'
# TEST_DIR = UNZIP_DIR / 'medium_test_ds'

# # Large Dataset
# TRAIN_DIR = UNZIP_DIR / 'large_train_ds'
# VAL_DIR = UNZIP_DIR / 'large_val_ds'
# TEST_DIR = UNZIP_DIR / 'medium_test_ds'

# # Large 3 Labels Dataset
# TRAIN_DIR = UNZIP_DIR / '3_large_train_ds'
# VAL_DIR = UNZIP_DIR / '3_large_val_ds'
# TEST_DIR = UNZIP_DIR / '3_large_test_ds'

# # No_mod Dataset
# TRAIN_DIR = UNZIP_DIR / 'no_mod_train_ds'
# VAL_DIR = UNZIP_DIR / 'no_mod_val_ds'
# TEST_DIR = UNZIP_DIR / 'no_mod_test_ds'

# New_Dataset
# TRAIN_DIR = UNZIP_DIR / 'new_large_train_ds'
# VAL_DIR = UNZIP_DIR / 'new_large_val_ds'

# extract


In [None]:
def extract_zip(zip_path, extract_to):
    zip_path_str = str(zip_path)
    
    if not zip_path_str.endswith('.zip'):
        zip_path_str += '.zip'
    
    zip_file_path = pathlib.Path(zip_path_str)
    
    folder_name = zip_file_path.stem 
    target_folder = pathlib.Path(extract_to) / folder_name
    
    if target_folder.exists():
        print(f"Das Verzeichnis {target_folder} existiert bereits. Überspringe das Extrahieren.")
    else:
        if zip_file_path.exists():
            print(f"Extrahiere die Zip-Datei {zip_file_path} nach {extract_to}.")
            with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
                zip_ref.extractall(extract_to)
            print(f"Zip-Datei {zip_file_path} erfolgreich extrahiert.")
        else:
            print(f"Die Zip-Datei {zip_file_path} existiert nicht.")

def rename_audio_files(root_path):
    for root, dirs, files in os.walk(root_path):
        parent_folder = os.path.basename(root)
        for file in files:
            if not file.startswith(f"{parent_folder}_"):
                if file.endswith(('.wav', '.mp3')):  
                    
                    old_file_path = os.path.join(root, file)
                    new_file_name = f"{parent_folder}_{file}"
                    new_file_path = os.path.join(root, new_file_name)
                        
                    os.rename(old_file_path, new_file_path)
        print(f"renaming of {root_path}/{parent_folder} complete")


# Resize Audio Files

In [None]:
def determine_max_length():
    global max_length
    max_length_local = 0
    
    for subdir, _, files in os.walk(TRAIN_DIR):
        for file in files:
            if file.endswith(".wav"):
                input_path = Path(subdir) / file
                audio = AudioSegment.from_file(input_path)
                duration = len(audio)
                max_length_local = max(max_length_local, duration)
    
    max_length = max_length_local

def normalize_audio_length(input_dir):
    global max_length
    output_dir = input_dir.parent / f"{input_dir.name}_normalized"
    if output_dir.exists():
        print(f"Überspringe Verarbeitung, da {output_dir} bereits existiert.")
        return output_dir
    os.makedirs(output_dir, exist_ok=True)
    
    audio_files = []
    
    if max_length == 0:
        determine_max_length()
    
    print(f"Maximale Länge: {max_length / 1000} Sekunden")
    
    for subdir, _, files in os.walk(input_dir):
        for file in files:
            if file.endswith(".wav"):
                input_path = Path(subdir) / file
                audio = AudioSegment.from_file(input_path)
                if len(audio) > max_length:
                    print(f"⚠️ {input_path.name} ist länger als {max_length / 1000} Sekunden. Kürze Datei!")
                    audio = audio[:max_length]

                padded_audio = audio + AudioSegment.silent(duration=max(0, max_length - len(audio)))
                
                relative_path = input_path.parent.relative_to(input_dir)
                target_dir = output_dir / relative_path
                os.makedirs(target_dir, exist_ok=True)
                output_path = target_dir / input_path.name
                
                padded_audio.export(output_path, format="wav")
                print(f"Processed {input_path.name}: expanded to {max_length / 1000} seconds")
    
    print(f"Processing complete. Normalized files saved in {output_dir}")
    return output_dir

# Mel-Spektogram

In [None]:
def process_audio_file(audio_file, input_dir, output_dir, n_mels, fmin, fmax):
    """
    Diese Funktion verarbeitet eine einzelne Audiodatei und berechnet das Mel-Spektrogramm.
    """
    relative_path = audio_file.relative_to(input_dir)
    
    target_dir = output_dir / relative_path.parent
    target_dir.mkdir(parents=True, exist_ok=True)
    

    y, sr = librosa.load(audio_file, sr=44100)

    mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels, fmin=fmin, fmax=fmax)

    mel_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max)

    return relative_path, mel_spectrogram, sr, target_dir, audio_file.stem

def generate_mel_spectrograms_with_structure(input_dir, output_dir, n_mels=256, fmin=20, fmax=44100, batch_size=25, square = False):
    """
    Optimierte Funktion für die Verarbeitung von Mel-Spektrogrammen:
    1. Berechnung wird parallelisiert.
    2. Ergebnisse werden sequentiell geplottet, um Thread-Sicherheitsprobleme zu vermeiden.
    3. Batches werden verwendet, um den Speicherverbrauch zu kontrollieren.
    """
    input_dir = Path(input_dir)
    output_dir = Path(output_dir)

    if output_dir.exists() and any(output_dir.rglob("*.png")):
        print(f"Überspringe Verarbeitung, da {output_dir} bereits Mel-Spektrogramme enthält.")
        return

    audio_files = list(input_dir.rglob("*.wav"))

    if not audio_files:
        print("Keine Audiodateien gefunden.")
        return

    total_files = len(audio_files)
    print(f"{total_files} Audiodateien gefunden. Verarbeitung startet.")

    for batch_start in range(0, total_files, batch_size):
        batch_files = audio_files[batch_start:batch_start + batch_size]
        plt.cla()
        plt.clf()
        plt.close('all')
        gc.collect()
        print(f"Verarbeite Batch {batch_start // batch_size + 1} von {total_files // batch_size + 1}")

        results = []
        with concurrent.futures.ThreadPoolExecutor() as executor:
            futures = [
                executor.submit(process_audio_file, audio_file, input_dir, output_dir, n_mels, fmin, fmax)
                for audio_file in batch_files
            ]
            for future in concurrent.futures.as_completed(futures):
                results.append(future.result())

        for relative_path, mel_spectrogram_db, sr, target_dir, audio_file_stem in results:
            mel_spectrogram_path = target_dir / f"{audio_file_stem}_mel_spectrogram.png"

            if mel_spectrogram_path.exists():
                print(f"Spektrogramm {mel_spectrogram_path} existiert bereits. Überspringen.")
                continue

            try:
                if square:
                    plt.figure(figsize=(2, 2))
                    librosa.display.specshow(mel_spectrogram_db, x_axis='time', y_axis='mel', sr=sr, cmap='magma', fmin=fmin, fmax=fmax)
                    plt.axis('off')

                    plt.savefig(mel_spectrogram_path, bbox_inches='tight', pad_inches=0, dpi=300)
                    plt.close()
                    print(f"{batch_start // batch_size + 1} von {total_files // batch_size + 1}__Mel-Spektrogramm für {audio_file_stem} gespeichert in {mel_spectrogram_path}")
                else:
                    height = 333 if downsize else 2000  
                    width = height * 30  
                    dpi = 100  
                    figsize = (width / dpi, height / dpi)

                    fig, ax = plt.subplots(figsize=figsize, dpi=dpi, frameon=False)

                    librosa.display.specshow(mel_spectrogram_db, x_axis='time', y_axis='mel', sr=sr, cmap='magma', fmin=fmin, fmax=fmax)
                    ax.set_axis_off()

                    plt.savefig(mel_spectrogram_path, bbox_inches='tight', pad_inches=0, dpi=dpi)
                    plt.close(fig)
                    print(f"{batch_start // batch_size + 1} von {total_files // batch_size + 1}__Mel-Spektrogramm für {audio_file_stem} gespeichert in {mel_spectrogram_path}")

            except Exception as e:
                print(f"Fehler beim Plotten von {audio_file_stem}: {e}")
            finally:
                del mel_spectrogram_db, fig, ax

    print(f"Alle Mel-Spektrogramme gespeichert in {output_dir}")

In [None]:
def split_spectrogram(image_path, output_dir):
    """
    Schneidet ein Spektrogramm in gleich große Quadrate.
    :param image_path: Pfad zum Spektrogramm (PNG)
    :param output_dir: Ordner zum Speichern der Segmente
    :param segment_size: Größe jedes quadratischen Segments (Standard: 924x924)
    """
    os.makedirs(output_dir, exist_ok=True)
    trash_dir = output_dir.parent.parent / f"{output_dir.parent.name}_trash"
    os.makedirs(trash_dir, exist_ok=True)

    img = Image.open(image_path)
    width, height = img.size
    segment_size= height

    num_segments = width // segment_size -1
    for i in range(1, num_segments):
        left = i * segment_size
        right = left + segment_size
        segment = img.crop((left, 0, right, segment_size))
        segment_array = np.array(segment)
        
        silence_threshold = 10  
        silence_ratio = np.mean(segment_array < silence_threshold)

        transparency_ratio = 0
        if segment.mode == 'RGBA':
            alpha_channel = segment_array[:, :, 3] 
            transparency_ratio = np.mean(alpha_channel == 0)  

        output_path = Path(output_dir) / f"{Path(image_path).stem}_part{i}.png"
        if silence_ratio >= 0.7 or transparency_ratio > 0.2:
            output_path = trash_dir / f"{Path(image_path).stem}_part{i}.png"
            print(f"Segment {i} enthält {silence_ratio * 100:.2f}% Stille und wird in den Trash {output_path} verschoben.")
        
        try:
            segment.save(output_path)
        except IOError:
            print(f"Fehler beim Speichern des Segments: {output_path}")
            continue

    
    print(f"Spektrogramm in {num_segments} Segmente geschnitten und gespeichert in {output_dir}")

def process_spectrograms(input_dir):
    """
    Durchsucht rekursiv alle Unterordner eines Verzeichnisses nach Spektrogrammen und schneidet sie in Segmente.
    Die Ordnerstruktur des Eingabeverzeichnisses wird im Ausgabeordner beibehalten.
    :param input_dir: Verzeichnis mit Unterordnern, die Spektrogramme enthalten
    :param output_dir: Verzeichnis zum Speichern der Segmente
    """
    input_dir = Path(input_dir)
    output_dir = input_dir.parent / f"{input_dir.name}_splits"
    if output_dir.exists():
        print(f"Output directory already exists: {output_dir}. Skipping splitting.")
        return output_dir
    
    for subdir in input_dir.iterdir():
        if subdir.is_dir(): 
            for image_path in subdir.glob("*.png"):  
                relative_path = subdir.relative_to(input_dir)
                target_dir = output_dir / relative_path
                try:
                    img = Image.open(image_path)
                    img.verify()
                    split_spectrogram(image_path, target_dir) 
                except (IOError, SyntaxError):
                    print(f"Fehler: Beschädigtes oder ungültiges Bild übersprungen: {image_path}")
                    continue
    return output_dir

# Build

In [None]:
def load_image(image_path):
    """Hilfsfunktion zum Laden und Vorverarbeiten eines Bildes"""
    img = tf.io.read_file(image_path)
    img = tf.image.decode_png(img, channels=3)
    img = tf.image.resize(img, (256, 256)) 
    img = img / 255.0  # Normalisierung
    return img

def create_siamese_dataset(directory, batch_size=32):
    """ Erstellt ein TensorFlow Dataset mit dynamischer Paarbildung für das Siamese Network """
    
    image_paths = list(Path(directory).glob("*.png"))
    
    paired_data = {}
    for path in image_paths:
        base_name = re.sub(r'(upscale-from-mp3-128|orig-16-44-mono)_', '', path.stem) 
        
        if base_name not in paired_data:
            paired_data[base_name] = {"original": None, "upscaled": None}
        
        if "orig-16-44-mono" in path.stem: 
            paired_data[base_name]["original"] = str(path)
        elif "upscale-from-mp3-128" in path.stem:  
            paired_data[base_name]["upscaled"] = str(path)

    pairs = []
    for data in paired_data.values():
        if data["original"] and data["upscaled"]:
            pairs.append((load_image(data["original"]), load_image(data["upscaled"])))


    dataset = tf.data.Dataset.from_tensor_slices(pairs)


    return dataset

def build_siamese_model(input_shape=(128, 128, 3)):
    """ Erstellt ein Siamese CNN mit Normalisierung """
    print("use siamese model")
    norm_layer = layers.Normalization()

    base_model = models.Sequential([
        layers.Conv2D(64, 3, activation='relu'),
        layers.BatchNormalization(),
        layers.MaxPooling2D(),
        layers.Flatten(),
        layers.Dense(128, activation='relu')
    ])
    print(base_model)
    input1 = layers.Input(shape=input_shape)
    input2 = layers.Input(shape=input_shape)

    encoded1 = base_model(input1)
    encoded2 = base_model(input2)

    distance = layers.Lambda(lambda tensors: K.abs(tensors[0] - tensors[1]))([encoded1, encoded2])

    output = layers.Dense(1, activation="sigmoid")(distance)

    model = models.Model([input1, input2], output)
    return model


# Contrastive Loss
def contrastive_loss(y_true, y_pred, margin=1.0):
    """ Contrastive Loss Funktion """
    square_pred = K.square(y_pred)
    margin_square = K.square(K.maximum(margin - y_pred, 0))
    return K.mean(y_true * square_pred + (1 - y_true) * margin_square)



def build_resnet_model(input_shape=(128, 128, 3), num_classes=2, dropout_rate=0.2, learning_rate=0.00005, activation='relu', fine_tune_at=None):

    print("use Resnet_Model")
    tf.keras.mixed_precision.set_global_policy("mixed_float16")
    base_model = ResNet50(include_top=False, weights='imagenet', input_shape=input_shape)
    base_model.trainable = True

    
    # Optional: freeze base-model
    if fine_tune_at is not None:
        for layer in base_model.layers[:fine_tune_at]:
            layer.trainable = False
    
    # add classification header
    x = base_model.output
    x = GlobalAveragePooling2D()(x)  
    x = Dropout(dropout_rate)(x)  
    x = Dense(512, activation=activation)(x)  
    x = Dropout(dropout_rate)(x)  
    predictions = Dense(num_classes, activation='softmax',  dtype='float32')(x)  
    
    model = Model(inputs=base_model.input, outputs=predictions)
    
    model.compile(optimizer=Adam(learning_rate=learning_rate),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model

def calculate_shap_values(model, test_ds, num_samples=1, num_images_to_explain=1):
    """
    Berechnet Shapley-Werte für eine angegebene Anzahl von Bildern aus dem Testdatensatz.

    Args:
        model: Das zu erklärende Modell.
        test_ds: Der Testdatensatz.
        num_samples: Anzahl der Batches, die aus dem Testdatensatz genommen werden.
        num_images_to_explain: Anzahl der Bilder, für die Shapley-Werte berechnet werden sollen.

    Returns:
        shap_values_list: Liste der Shapley-Werte für jedes Bild.
        test_images_list: Liste der Testbilder, für die die Shapley-Werte berechnet wurden.
    """
    test_images = []
    for images, _ in test_ds.take(num_samples):
        test_images.append(images)
    test_images = np.concatenate(test_images, axis=0)

    if num_images_to_explain > test_images.shape[0]:
        print(f"Nur {test_images.shape[0]} Bilder verfügbar, anstatt {num_images_to_explain}.")
        num_images_to_explain = test_images.shape[0]

    shap_values_list = []
    test_images_list = []

    for i in range(num_images_to_explain):
        test_image = test_images[i:i+1] 

        explainer = shap.GradientExplainer(model, test_image)

        shap_values = explainer.shap_values(test_image)

        shap_values_list.append(shap_values)
        test_images_list.append(test_image)
        print("Shapley-Werte Form:", shap_values[0].shape)
        print("Bild Form:", test_image.shape)

    return shap_values_list, test_images_list

def plot_shap_values(shap_values_list, test_images_list):
    """
    Visualisiert die Shapley-Werte für mehrere Bilder.

    Args:
        shap_values_list: Liste der Shapley-Werte für jedes Bild.
        test_images_list: Liste der Testbilder, die visualisiert werden sollen.
    """
    for i, (shap_values, test_image) in enumerate(zip(shap_values_list, test_images_list)):
        print(f"Shapley-Werte für Bild {i+1}:")
        test_image = test_image / 255.0
        shap.image_plot(shap_values, test_image)

In [None]:
def model_train_results(session):
    import matplotlib.pyplot as plt
    import numpy as np

    history = session.history
    metrics = history.history
    
    early_stopping = session.callbacks[0]  
    best_epoch = early_stopping.best_epoch  
    
    batch_size = session.model_batch_size
    
    epochs = np.array(history.epoch)

    plt.figure(figsize=(16, 6))

    plt.subplot(1, 2, 1)
    plt.plot(epochs, metrics["loss"], label=f"Train Loss {session.model_values[2]:.3f}")
    plt.plot(epochs, metrics["val_loss"], label=f"Val Loss {session.model_values[0]:.3f}")

    if best_epoch is not None:
        plt.axvline(
            x=best_epoch,  
            color="green", 
            linestyle="--",
            label=f"Best Weights Epoch {best_epoch}"
        )

    print(session.best_model_values)  

    if session.best_model_values is not None:
        dropout_value = 'N/A'
        learning_value = 'N/A'
        activation_function = 'N/A'

        for param, value in session.best_model_values.items():
            if param.name == 'dropout':
                dropout_value = value 
            elif param.name == 'learning':
                learning_value = value  
            elif param.name == 'activation':
                activation_function = value
    else:
        dropout_value = 'N/A'
        learning_value = 'N/A'
        activation_function = 'N/A'

    print(f"dropout: {dropout_value}, learning: {learning_value}, activation: {activation_function}")

    plt.subplots_adjust(bottom=0.65)
    plt.figtext(
        0.5, 0.01,
        f"Batch Size: {session.model_batch_size}\n"
        f"HParams: {'Default' if session.best_model_values is None else f'dropout: {dropout_value}, learning: {learning_value}, activation: {activation_function}'}",
        fontsize=8, ha="center", va="bottom", color="black"
    )

    plt.legend()
    plt.ylim([0, max(plt.ylim())])
    plt.xlabel("Epoch")
    plt.ylabel("Loss [CrossEntropy]")

    plt.subplot(1, 2, 2)
    plt.plot(epochs, 100 * np.array(metrics["accuracy"]), label=f"Train Accuracy {session.model_values[3]:.3f}")
    plt.plot(epochs, 100 * np.array(metrics["val_accuracy"]), label=f"Val Accuracy {session.model_values[1]:.3f}")

    if best_epoch is not None:
        plt.axvline(
            x=best_epoch,  
            color="green", 
            linestyle="--",
            label=f"Best Weights Epoch {best_epoch}"
        )

    plt.legend()
    plt.ylim([0, 100])
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy [%]")

    plt.tight_layout()

    return plt


In [None]:
def train_with_hparams(train_ds, val_ds, hparams):
    model = build_resnet_model(input_shape=(img_height, img_width, 3), num_classes=2, fine_tune_at=100, dropout_rate=hparams[HP_DROPOUT], learning_rate=hparams[HP_LEARNING], activation = hparams[HP_ACTIVATION])


    # TensorBoard Logging
    log_dir = f"{LOG_DIR}/run-{hparams[HP_DROPOUT]}-{hparams[HP_LEARNING]}-{hparams[HP_ACTIVATION]}"
    tensorboard_cb = tf.keras.callbacks.TensorBoard(log_dir=log_dir)
    hparams_cb = hp.KerasCallback(log_dir, hparams)
    
    history = model.fit(
        train_ds,
        validation_data=val_ds,
        epochs=epochs,
        callbacks=[tensorboard_cb, hparams_cb],
    )
    print(history.history)
    param = history.history['accuracy'][-1]
    return param

def hparam_tuning(train_ds, val_ds):
    session_results = []

    for activation in HP_ACTIVATION.domain.values:
        for dropout_rate in [0.2, 0.3, 0.4]:
            for learning_rate in [0.0001, 0.00025, 0.0005]:
                hparams = {
                    HP_DROPOUT: dropout_rate,
                    HP_LEARNING: learning_rate,
                    HP_ACTIVATION: activation,
                }
                print(f"Testing HParams: {hparams}")
                param = train_with_hparams(train_ds, val_ds, hparams)
                session_results.append((hparams, param))
                clear_output(wait=True)

    print("HParam tuning completed.")
    return session_results

In [None]:
def save_session_as_zip(session, train_ds, zip_dir="model_results"):

    os.makedirs(zip_dir, exist_ok=True)

    num_files = sum(1 for _ in train_ds) * session.model_batch_size  

    val_loss, val_acc, train_loss, train_acc = session.model_values
    val_loss, val_acc = round(val_loss, 3), round(val_acc, 3)
    train_loss, train_acc = round(train_loss, 3), round(train_acc, 3)

    name_to_save = f"({num_files}_{img_height}px-{'resnet_model'if use_resnet else 'own_model'})_loss_{train_loss}_acc_{train_acc}_val_loss_{val_loss}_val_acc_{val_acc}"

    zip_filename = f"model_{name_to_save}.zip"
    zip_path = os.path.join(zip_dir, zip_filename)

    model_path = os.path.join(zip_dir, "model.h5")
    session.model.save(model_path)

    plot = model_train_results(session)
    plot_path = os.path.join(zip_dir, f"history_{name_to_save}.png")
    plot.savefig(plot_path)
    model_image_path = f"{zip_dir}/model.png"
    visualkeras.layered_view(session.model, legend=True, show_dimension=True, to_file=model_image_path)
    model_image_path_2 = f"{zip_dir}/model_2.png"
    plot_model(session.model, to_file=model_image_path_2, show_shapes=True, show_layer_names=True, expand_nested=True, dpi=96)

    plot.show()
    plot.close()


    session_path = os.path.join(zip_dir, "session_data.json")

    print("History wurde als JSON und CSV gespeichert.")
    
    max_length = 0
    for subdir, _, files in os.walk(TRAIN_DIR):
        for file in files:
            if file.endswith(".wav"):
                input_path = Path(subdir) / file
                audio = AudioSegment.from_file(input_path)
                duration = len(audio)
                max_length = max(max_length, duration)
        

        
    session_data = {
        "model_values": session.model_values,
        "model_batch_size": session.model_batch_size,
        "best_model_values": {str(k): v for k, v in session.best_model_values.items()} if session.best_model_values else None,
        "history": session.history.history,
        "softmax_values": [[[float(v) for v in sample] for sample in epoch] for epoch in session.softmax_values], 
        "max_length": max_length,
        "img_height": img_height
    }
    print("save")
    print(session.best_model_values)
    print(session_data["best_model_values"])
    with open(session_path, "w") as f:
        json.dump(session_data, f, indent=4, default=lambda o: float(o) if isinstance(o, np.float32) else o)

    with zipfile.ZipFile(zip_path, "w") as zipf:
        zipf.write(model_path, arcname="model.h5")
        zipf.write(plot_path, arcname=f"history_{name_to_save}.png")
        zipf.write(session_path, arcname="session_data.json")
        zipf.write(model_image_path, arcname="model.png")
        zipf.write(model_image_path_2, arcname="model_2.png")

    os.remove(model_path)
    os.remove(plot_path)
    os.remove(session_path)
    os.remove(model_image_path)
    os.remove(model_image_path_2)

    print(f"Session-Daten erfolgreich in {zip_path} gespeichert.")
    return zip_path

In [None]:
class SoftmaxLogger(tf.keras.callbacks.Callback):
    def __init__(self, dataset, num_samples=5):
        super().__init__()
        self.dataset = dataset
        self.num_samples = num_samples
        self.softmax_history = [] 

    def on_epoch_end(self, epoch, logs=None):
        softmax_values_epoch = []
        print(f"\nSoftmax-Werte nach Epoche {epoch+1}:")
        for images, labels in self.dataset.take(1):
            predictions = self.model.predict(images[:self.num_samples], verbose=0)
            softmax_values = tf.nn.softmax(predictions).numpy()
            for i in range(self.num_samples):
                softmax_values_epoch.append(softmax_values[i]) 
                print(f"Sample {i+1}: {softmax_values[i]} (Label: {labels[i].numpy()})")
            break
        self.softmax_history.append(softmax_values_epoch)

In [None]:
def get_image_size_from_dir(directory):
    """Liest die Bildgröße aus der ersten Datei im Verzeichnis."""
    for root, _, files in os.walk(directory):
        for file in files:
            if file.lower().endswith(('png', 'jpg', 'jpeg')):
                img_path = os.path.join(root, file)
                with Image.open(img_path) as img:
                    return img.size[::-1]
    raise ValueError("Keine Bilder im Verzeichnis gefunden!")

def preprocess_data(train_dir, val_dir, test_dir, dropout_rate=0.2, regularization_rate=0.0001, activation='relu'):
    global img_height, img_width
    img_height, img_width = get_image_size_from_dir(train_dir)
    print(f"Ermittelte Bildgröße: {img_height} x {img_width}")

    if not use_siamese:
        train_ds = tf.keras.utils.image_dataset_from_directory(
            train_dir,
            image_size=(img_height, img_width),
            batch_size=BATCH_SIZE
        )

        val_ds = tf.keras.utils.image_dataset_from_directory(
            val_dir,
            image_size=(img_height, img_width),
            batch_size=BATCH_SIZE
        )

        test_ds = tf.keras.utils.image_dataset_from_directory(
            test_dir,
            image_size=(img_height, img_width),
            batch_size=BATCH_SIZE
        )

    else:
        train_ds = create_siamese_dataset(train_dir, batch_size=BATCH_SIZE)
        val_ds = create_siamese_dataset(val_dir, batch_size=BATCH_SIZE)
        test_ds = create_siamese_dataset(test_dir, batch_size=BATCH_SIZE)
        print("siamese DS Created")

    # Preprocessing
    train_ds = train_ds.prefetch(tf.data.experimental.AUTOTUNE)
    val_ds = val_ds.prefetch(tf.data.AUTOTUNE)
    test_ds = test_ds.prefetch(tf.data.AUTOTUNE)
    for batch in train_ds.take(1):
        images, labels = batch
        print(images.numpy().min(), images.numpy().max()) 

    return train_ds, val_ds, test_ds

def build_model(input_shape=(128, 128, 3), num_classes=2, dropout_rate=0.2, regularization_rate=0.0001, activation='relu'):

    print(f"num_labels: {num_classes}")
    print("")
    print("")
        

    print("Build Model with:")
    print (f"dropout_rate = {dropout_rate}")
    print (f"regularization_rate = {regularization_rate}")
    print (f"activation = {activation}")
    print(f"input_shape: {input_shape}")
    print("")
    norm_layer = layers.Normalization()
    model = models.Sequential([
        layers.Input(shape=input_shape),
        norm_layer,
        layers.Conv2D(16, 3, activation=activation, kernel_regularizer=tf.keras.regularizers.l2(regularization_rate)),
        layers.BatchNormalization(),
        layers.MaxPooling2D(),
        layers.Conv2D(32, 3, activation=activation, kernel_regularizer=tf.keras.regularizers.l2(regularization_rate)),
        layers.BatchNormalization(),
        layers.MaxPooling2D(),

        layers.Conv2D(64, 3, activation=activation, kernel_regularizer=tf.keras.regularizers.l2(regularization_rate)),
        layers.BatchNormalization(),
        layers.MaxPooling2D(),
        layers.Conv2D(128, 3, activation=activation, kernel_regularizer=tf.keras.regularizers.l2(regularization_rate)),
        layers.BatchNormalization(),
        layers.MaxPooling2D(),
        layers.Flatten(),

        layers.Dense(128, activation=activation, kernel_regularizer=tf.keras.regularizers.l2(regularization_rate)),
        layers.Dropout(dropout_rate),
        layers.Dense(num_classes, activation='softmax'),
    ])

    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model

def train_and_result():

    train_ds, val_ds, test_ds = preprocess_data(train_mel_dir, val_mel_dir, test_mel_dir)

    if use_resnet:
        if model_optimization:
            session_results = hparam_tuning(train_ds, val_ds)
            best_hparams, best_val_accuracy = sorted(session_results, key=lambda x: x[1], reverse=True)[0]
            clear_output(wait=True)
            print(f"Results: {session_results}")
            print(f"Best Params: {best_hparams}")
            print(f"Best val_acc: {best_val_accuracy}")

            model = build_resnet_model(input_shape=(img_height, img_width, 3), num_classes=2, fine_tune_at=100, dropout_rate=best_hparams[HP_DROPOUT], learning_rate=best_hparams[HP_LEARNING], activation = best_hparams[HP_ACTIVATION])
        else:
            model = build_resnet_model(input_shape=(img_height, img_width, 3), num_classes=2, fine_tune_at=100)
    else:
        if use_siamese:
            model = build_siamese_model(input_shape=(img_height, img_width, 3))
            print("builded siamese model")
            model.compile(loss=contrastive_loss, optimizer='adam', metrics=['accuracy'])
            print("compiled")
        else:
            model = build_model(input_shape=(img_height, img_width, 3), num_classes=2)
        
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=(epochs // 4) if stop else epochs , restore_best_weights=True)
    softmax_logger = SoftmaxLogger(val_ds, num_samples=5)
    reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_accuracy', 
        factor=0.5,
        patience=3,
        min_lr=0.00001 
    )
    
    model.summary()
    history = model.fit(
        train_ds,
        validation_data=val_ds,
        epochs=epochs,
        callbacks=[early_stopping,  softmax_logger, reduce_lr]
    )

    trained_epochs = len(history.epoch)
    print(f"Das Training wurde nach {trained_epochs} Epochen gestoppt.")
    best_epoch = early_stopping.best_epoch
    print(f"Das beste Modell wurde in Epoche {best_epoch} gefunden.")
        
        
    val_loss = history.history['val_loss'][best_epoch]
    val_accuracy = history.history['val_accuracy'][best_epoch]
    train_loss = history.history['loss'][best_epoch]
    train_accuracy = history.history['accuracy'][best_epoch]

    batch_size = None
    for element in train_ds.take(1):
        batch_size = element[0].shape[0]
        break
        
    session = SimpleNamespace(
        model=model,
        history=history,
        epochs=epochs,
        callbacks=[early_stopping],
        model_values = [val_loss, val_accuracy, train_loss, train_accuracy],
        model_batch_size = batch_size,
        best_model_values = None,
        softmax_values=softmax_logger.softmax_history, 
        img_height = img_height
    )
    if(model_optimization and best_hparams is not None):
        session.best_model_values = best_hparams
        print(f"Best HParams for training: {session.best_model_values} with validation accuracy: {best_val_accuracy}")

    path = save_session_as_zip(session, train_ds)
    print(f"Evaluated model with best weights: val_loss={val_loss}, val_accuracy={val_accuracy}")

    if use_shap_values:
        print("calculate_shap_values")
        if downsize is None:
            print("pictures > 256px => abbord")
            return path, session, test_ds
        shap_values_list, test_images_list = calculate_shap_values(model, test_ds, num_samples=1, num_images_to_explain=3)
        plot_shap_values(shap_values_list, test_images_list)


    return path, session, test_ds

In [None]:
def check_images(directory):
    print(f"check images in: {directory}")
    trash_dir = os.path.join(directory, '..', 'trash') 
    
    os.makedirs(trash_dir, exist_ok=True)

    for root, _, files in os.walk(directory):
        for filename in files:
            if filename.lower().endswith('.png'):
                file_path = os.path.join(root, filename)
                try:
                    with Image.open(file_path) as img:
                        img.verify()
                except (IOError, SyntaxError):
                    print(f"Fehler bei {filename}, verschiebe nach trash")
                    trash_file_path = os.path.join(trash_dir, os.path.relpath(file_path, directory))
                    os.makedirs(os.path.dirname(trash_file_path), exist_ok=True)
                    shutil.move(file_path, trash_file_path)

# Compare

In [None]:
def compare_segments(output_dir):
    """
    Diese Funktion vergleicht Mel-Spektrogramme aus zwei verschiedenen Label-Unterordnern, 
    indem sie die ersten fünf Dateien paarweise nach ihrem Index abgleicht und als Grafik anzeigt.

    :param output_dir: Verzeichnis, das die Label-Unterordner mit Mel-Spektrogrammen enthält
    """

    output_dir = Path(output_dir)
    label_dirs = [d for d in output_dir.iterdir() if d.is_dir()]

    if len(label_dirs) < 2:
        raise ValueError("Es müssen mindestens zwei Label-Unterordner vorhanden sein.")

    label_dirs.sort()

    files_per_label = [sorted(label_dir.glob("*.png")) for label_dir in label_dirs]

    pairs = list(zip(*[files[::30] for files in files_per_label]))[:5]

    for idx, (file1, file2) in enumerate(pairs):
        img1 = plt.imread(file1)
        img2 = plt.imread(file2)

        plt.figure(figsize=(10, 5))

        plt.subplot(1, 2, 1)
        plt.imshow(img1)
        plt.title(f"Label 1: {file1.stem[:80]}...", fontsize=6)
        plt.axis('off')

        plt.subplot(1, 2, 2)
        plt.imshow(img2)
        plt.title(f"Label 2: {file2.stem[:80]}...", fontsize=6) 
        plt.axis('off')

        plt.suptitle(f"Vergleich {idx + 1}")
        plt.show()
        plt.savefig(f"./compare/{idx + 1}_compare.png")

# Predict

In [None]:
def collect_predictions(model, test_ds, max_images=None):
    results = []
    class_names = test_ds.class_names if hasattr(test_ds, "class_names") else [str(i) for i in range(len(test_ds))]
    
    if max_images is not None:
        test_ds = test_ds.shuffle(buffer_size=1000) 

    image_count = 0
    for images, labels in test_ds:
        predictions = model.predict(images)
        predicted_labels = np.argmax(predictions, axis=1)
        
        for i in range(len(images)):
            if max_images is not None and image_count >= max_images:
                break
            true_label = class_names[labels[i].numpy()]
            predicted_label = class_names[predicted_labels[i]]
            results.append({
                'True Class': true_label,
                'Predicted Class': predicted_label
            })
            image_count += 1

        if max_images is not None and image_count >= max_images:
            break

    return results

def generate_summary(results):
    total_predictions = len(results)
    correct_predictions = sum(1 for result in results if result['True Class'] == result['Predicted Class']) 
    accuracy = correct_predictions / total_predictions * 100

    summary_data = {
        'Total Correct': f"{correct_predictions} / {total_predictions} ({accuracy:.2f}%)",
        'Classwise Accuracy': {}
    }

    for label in set(result['True Class'] for result in results):
        correct_for_class = sum(1 for result in results if result['True Class'] == label and result['True Class'] == result['Predicted Class'])
        total_for_class = sum(1 for result in results if result['True Class'] == label)
        class_accuracy = (correct_for_class / total_for_class * 100) if total_for_class > 0 else 0
        summary_data['Classwise Accuracy'][label] = f"{class_accuracy:.2f}%"

    return summary_data

def display_summary(summary, save_path="model_results"):
    summary_data = [
        ["Total Correct", summary["Total Correct"]],
    ]
    
    for label, accuracy in summary['Classwise Accuracy'].items():
        summary_data.append([f"{label} Accuracy", accuracy])

    summary_df = pd.DataFrame(summary_data, columns=["Metric", "Value"])

    fig, ax = plt.subplots(figsize=(8, 2))
    ax.axis('tight')
    ax.axis('off')

    table = ax.table(cellText=summary_df.values, colLabels=summary_df.columns, cellLoc="center", loc="center", colWidths=[0.5, 0.5])
    
    plt.show()
    table_filename = "summary_table.png"
    table_path = os.path.join(save_path, table_filename)
    fig.savefig(table_path, bbox_inches='tight', pad_inches=0.1)
    print(f"Table saved as {table_path}")

    return fig

def visualize_and_summarize_predictions(model, test_ds, max_images=10):

    results = collect_predictions(model, test_ds, max_images=max_images)
    
    summary = generate_summary(results)
    
    display_summary(summary)

# Run

In [None]:
if __name__ == "__main__":
    extract_zip(TRAIN_DIR, UNZIP_DIR)
    extract_zip(TEST_DIR, UNZIP_DIR)
    extract_zip(VAL_DIR, UNZIP_DIR)
    rename_audio_files(UNZIP_DIR)

    TRAIN_DIR=normalize_audio_length(TRAIN_DIR)
    VAL_DIR=normalize_audio_length(VAL_DIR)
    TEST_DIR=normalize_audio_length(TEST_DIR)


    train_mel_dir = Path(f"{TRAIN_DIR.stem}_mel_spectrograms" + ("_downsize" if downsize else ""))  # Der Zielordner für die Mel-Spektrogramme
    generate_mel_spectrograms_with_structure(TRAIN_DIR.resolve(), train_mel_dir)
    train_mel_dir = process_spectrograms(train_mel_dir)

    val_mel_dir = Path(f"{VAL_DIR.stem}_mel_spectrograms" + ("_downsize" if downsize else ""))  # Der Zielordner für die Mel-Spektrogramme
    generate_mel_spectrograms_with_structure(VAL_DIR, val_mel_dir)
    val_mel_dir = process_spectrograms(val_mel_dir)

    test_mel_dir = Path(f"{TEST_DIR.stem}_mel_spectrograms" + ("_downsize" if downsize else ""))  # Der Zielordner für die Mel-Spektrogramme
    generate_mel_spectrograms_with_structure(TEST_DIR, test_mel_dir)
    test_mel_dir = process_spectrograms(test_mel_dir)

    if check_image:
        check_images(train_mel_dir)
        check_images(val_mel_dir)
        check_images(test_mel_dir)


    # import ipynb.fs.defs.Programm_audioToPicture as old_Programm

    # wav_dir = Path("../picture_model/compare/orig_vs_split")
    # wav_files = list(wav_dir.glob("*.wav"))

    # for wav_file in wav_files:
    #     splits_dir = wav_dir / f"{wav_file.stem}_splits"
    #     print(f"wav_file: {wav_file}")
    #     old_Programm.process_and_compare_spectrograms(wav_file, splits_dir, input_file=Path(wav_file))


    # old_Programm.process_and_compare_spectrograms("../picture_model/compare/orig_vs_split/orig-16-44-mono_Burglar Bob.wav", "../picture_model/compare/orig_vs_split/splits/")
    # compare_segments(train_mel_dir)

    path, session, test_ds = train_and_result()

    # Funktion ausführen
    # visualize_and_summarize_predictions(session.model, test_ds)

    import ipynb.fs.defs.use_model_picture as use_model
    use_model.run(path, downsize)
    