In [None]:
import os
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import models
from IPython import display
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from IPython import get_ipython
import librosa
import zipfile
import shutil
import visualkeras

import ipynb.fs.defs.build_spectogram_ds as wave_to_spec
from tensorboard.plugins.hparams import api as hp


global history
global model
global test_ds
global val_ds
global train_ds
global label_names
global TEST_DIR
global SECONDS
global session

HP_DROPOUT = hp.HParam('dropout')
HP_REGULARIZATION = hp.HParam('regularization')


Let's plot the training and validation loss curves to check how your model has improved during training:


In [None]:
def adjust_zip_file_path(base_name="model", decimals=3):
    base_name = f"{base_name}_({sum(1 for _ in train_ds.unbatch())}-{SECONDS})"

    # val_loss, val_accuracy = model.evaluate(val_ds, verbose=0)
    # loss, accuracy = model.evaluate(train_ds, verbose=0)

    val_loss, val_accuracy, train_loss, train_accuracy = session.model_values

    # Metriken runden
    loss_rounded = round(train_loss, decimals)
    accuracy_rounded = round(train_accuracy, decimals)
    val_loss_rounded = round(val_loss, decimals)
    val_accuracy_rounded = round(val_accuracy, decimals)

    # Dynamischer Dateiname
    return (
        f"{base_name}_loss_{loss_rounded:.{decimals}f}_acc_{accuracy_rounded:.{decimals}f}"
        f"_val_loss_{val_loss_rounded:.{decimals}f}_val_acc_{val_accuracy_rounded:.{decimals}f}"
    )


In [None]:
def model_train_results():
    import matplotlib.pyplot as plt
    import numpy as np

    # Extrahieren der Daten aus dem Session-Objekt
    history = session.history
    metrics = history.history
    
    # Zugriff auf EarlyStopping Callback und Best-Weight Epoche
    early_stopping = session.callbacks[1]  # EarlyStopping Callback
    best_epoch = early_stopping.best_epoch  # Epoche des besten Modells (mit restore_best_weights)
    
    batch_size = session.model_batch_size
    
    epochs = np.array(history.epoch)

    plt.figure(figsize=(16, 6))

    # Plot für den Loss
    plt.subplot(1, 2, 1)
    plt.plot(epochs, metrics["loss"], label=f"Train Loss {session.model_values[2]:.3f}")
    plt.plot(epochs, metrics["val_loss"], label=f"Val Loss {session.model_values[0]:.3f}")

    # Vertikale Linie bei der Best-Weights Epoche
    if best_epoch is not None:
        plt.axvline(
            x=best_epoch,  # 1-basierte Epoche
            color="green", 
            linestyle="--",
            label=f"Best Weights Epoch {best_epoch}"
        )

    print(session.best_model_values)  

    # Überprüfen, ob 'session.best_model_values' nicht None ist
    if session.best_model_values is not None:
        # Initialisiere die Variablen mit 'N/A'
        dropout_value = 'N/A'
        regularization_value = 'N/A'

        # Iteriere über das Dictionary, um den richtigen Wert für dropout und regularization zu extrahieren
        for param, value in session.best_model_values.items():
            if param.name == 'dropout':
                dropout_value = value  # Der Wert von dropout
            elif param.name == 'regularization':
                regularization_value = value  # Der Wert von regularization
    else:
        dropout_value = 'N/A'
        regularization_value = 'N/A'

    # Überprüfe die Ausgaben
    print(f"dropout: {dropout_value}, regularization: {regularization_value}")

    # Jetzt korrektes Anzeigen im Plot
    plt.figtext(
        0.5, 0.025,
        f"Batch Size: {session.model_batch_size} || "
        f"Training with: {'Mel_Spectogram' if mel_spectogram else 'Spectogram'} || "
        f"HParams: {'Default' if session.best_model_values is None else f'dropout: {dropout_value}, regularization: {regularization_value}'}",
        fontsize=10, ha="center", va="bottom", color="black"
    )

    plt.legend()
    plt.ylim([0, max(plt.ylim())])
    plt.xlabel("Epoch")
    plt.ylabel("Loss [CrossEntropy]")

    # Plot für die Accuracy
    plt.subplot(1, 2, 2)
    plt.plot(epochs, 100 * np.array(metrics["accuracy"]), label=f"Train Accuracy {session.model_values[3]:.3f}")
    plt.plot(epochs, 100 * np.array(metrics["val_accuracy"]), label=f"Val Accuracy {session.model_values[1]:.3f}")

    # Vertikale Linie bei der Best-Weights Epoche
    if best_epoch is not None:
        plt.axvline(
            x=best_epoch,  # 1-basierte Epoche
            color="green", 
            linestyle="--",
            label=f"Best Weights Epoch {best_epoch}"
        )

    plt.legend()
    plt.ylim([0, 100])
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy [%]")

    plt.tight_layout()
    plt.savefig(f"./saved/{adjust_zip_file_path(base_name='history')}.png")
    plt.savefig(f"./saved_models/{adjust_zip_file_path(base_name='history')}.png")
    # plt.show()


In [None]:
@tf.autograph.experimental.do_not_convert
def confusion_matrix():
    y_pred = model.predict(test_ds)
    y_pred = tf.argmax(y_pred, axis=1)
    y_true = tf.concat(list(test_ds.map(lambda s, lab: lab)), axis=0)

    confusion_mtx = tf.math.confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(10, 8))
    sns.heatmap(
        confusion_mtx,
        xticklabels=label_names,
        yticklabels=label_names,
        annot=True,
        fmt="g",
    )
    plt.xlabel("Prediction")
    plt.ylabel("Label")
    # plt.show()


## Run inference on an audio file

Finally, verify the model's prediction output using an input audio file of someone saying "no". How well does your model perform?


In [None]:
@tf.autograph.experimental.do_not_convert
def visualize_audio(file_path):
    try:
        audio, sample_rate = librosa.load(file_path, sr=None)
    except Exception as e:
        print(f"Could not process file '{file_path}': {e}")
        return

    # Überprüfen, ob das Audio mehrdimensional ist (z.B. Stereo)
    if audio.ndim > 1:
        waveform = tf.squeeze(audio)
    else:
        waveform = audio

    # Berechne die Anzahl der Samples, die eine Sekunde darstellen
    one_second_samples = sample_rate

    # Finde die Mitte des Audios und schneide eine Sekunde heraus
    mid_point = len(audio) // 2
    start = max(0, mid_point - one_second_samples // 2)
    end = start + one_second_samples
    audio_segment = audio[start:end]

    # Überprüfen, ob das Audio mehrdimensional ist (z.B. Stereo)
    if audio_segment.ndim > 1:
        waveform = tf.squeeze(audio_segment)
    else:
        waveform = audio_segment

    print(f"Form des Audiosignals: {waveform.shape}")
    print(f"Sample Rate: {sample_rate}")

    spectrogram = wave_to_spec.get_spectrogram(waveform)

    # Dimension anpassen für das Modell
    input_tensor = spectrogram[tf.newaxis, ...]

    prediction = model(input_tensor)

    x_labels = label_names

    plt.bar(x_labels, tf.nn.softmax(prediction[0]))
    plt.title(f"Vorhersage fuer {os.path.basename(file_path)}")
    plt.show()

    display.display(display.Audio(waveform, rate=sample_rate))


@tf.autograph.experimental.do_not_convert
def process_directory_for_visualization(directory_path):
    # wav_files = glob.glob(os.path.join(directory_path, "*.wav"))
    for root, dirs, files in os.walk(directory_path):
        for file in files:
            if file.endswith(".wav"):
                file_path = os.path.join(root, file)
                print(f"Verarbeite Datei: {file_path}")
                visualize_audio(file_path)
            else:
                print("Keine WAV-Dateien im Verzeichnis gefunden.")
                return


# extract_zip(TEST_DIR, DATA_DIR)
# rename_audio_files(DATA_DIR)


In [None]:
@tf.autograph.experimental.do_not_convert
def save_Model():
    class ExportModel(tf.Module):
        def __init__(self, model, label_names, history):
            self.model = model
            self.label_names = label_names
            self.history = history

            # Accept either a string-filename or a batch of waveforms.
            # YOu could add additional signatures for a single wave, or a ragged-batch.
            self.__call__.get_concrete_function(
                x=tf.TensorSpec(shape=(), dtype=tf.string)
            )
            self.__call__.get_concrete_function(
                x=tf.TensorSpec(shape=[None, 44100], dtype=tf.float32)
            )
        @tf.function
        def __call__(self, x):
            # If they pass a string, load the file and decode it.
            if x.dtype == tf.string:
                x = tf.io.read_file(x)
                x, _ = tf.audio.decode_wav(
                    x,
                    desired_channels=1,
                    desired_samples=44100,
                )
                x = tf.squeeze(x, axis=-1)
                x = x[tf.newaxis, :]

            # x = wave_to_spec.get_mel_spectrogram(x)
            x = wave_to_spec.get_spectrogram(x)
            result = self.model(x, training=False)

            class_ids = tf.argmax(result, axis=-1)
            class_names = tf.gather(self.label_names, class_ids)
            return {
                "predictions": result,
                "class_ids": class_ids,
                "class_names": class_names,
            }

    export = ExportModel(model, label_names, history)
    # export(tf.constant(str(data_dir/'no/01bb6a2a_nohash_0.wav')))
    # export(
    #     tf.constant(
    #         str(
    #             "data/medium_test_ds/upscale-from-mp3-128/upscale-from-mp3-128_TRAINED_Action Time.wav"
    #         )
    #     )
    # )

    model_save_path = "saved"
    tf.saved_model.save(export, model_save_path)
    # tf.keras.models.save_model(model, model_save_path)
    model_train_results()
    visualkeras.layered_view(model, legend=True, show_dimension=True, to_file=f'{model_save_path}/model.png')
    # zip_file_path = "very_good_model1.zip"
    zip_file_path = f"saved_models/{adjust_zip_file_path()}.zip"
    with zipfile.ZipFile(zip_file_path, "w", zipfile.ZIP_DEFLATED) as zipf:
        for root, dirs, files in os.walk(model_save_path):
            for file in files:
                file_path = os.path.join(root, file)
                zipf.write(file_path, os.path.relpath(file_path, model_save_path))

    shutil.rmtree(model_save_path)
    return zip_file_path

    # imported = tf.saved_model.load("saved")
    # imported(waveform[tf.newaxis, :])


In [None]:
@tf.autograph.experimental.do_not_convert
def run(
    _test_dir, _test_ds, _val_ds, _train_ds, _session, _label_names, _SECONDS, _mel_spectogram
):
    global TEST_DIR, history, model, session, test_ds, train_ds, val_ds, label_names, SECONDS, mel_spectogram

    TEST_DIR = _test_dir
    session = _session
    history = session.history
    model = session.model
    test_ds = _test_ds
    val_ds = _val_ds
    train_ds = _train_ds
    label_names = _label_names
    SECONDS = _SECONDS
    mel_spectogram = _mel_spectogram

    # model_train_results()

    # Run the model on the test set and check the model's performance
    model.evaluate(test_ds, return_dict=True)

    confusion_matrix()
    # process_directory_for_visualization(TEST_DIR)
    path = save_Model()

    return model, path
