## Transform Dataset to Numpy Arrays

Dieser Code Transformiert den Datensatz zu NumpyArrays. Dies Spart Berechnungszeit bei dem Training. Dies wurde aber Hauptsächlich für den Zweck erstellt, um daraus dann C Arrays zu erstellen, die als Header Dateien in dem C-Projekt eingefügt werden können.

Dieser Code wurde durch Chat GPT erstellt!

Prompt: 
Lade Daten als Audio aus dem Datensatz, führe die MFCC Berechnung durch und speicher die Ergebnisse als NumpyArrays in einem neuen Ordner.

In [1]:
import os
import numpy as np
import torchaudio
import tensorflow as tf

def compute_tf_mfcc(waveform, sample_rate, window_length, hop_length, num_mel_bins=40, num_mfcc=40):
    # Berechne die STFT
    stfts = tf.signal.stft(
        waveform,
        frame_length=window_length,
        frame_step=hop_length,
        fft_length=window_length,
        window_fn=tf.signal.hann_window,
        pad_end=False
    )
    spectrograms = tf.abs(stfts)
    
    # Erzeuge die lineare -> mel Gewichtsmatrix
    num_spectrogram_bins = spectrograms.shape[-1]
    linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(
        num_mel_bins, num_spectrogram_bins, sample_rate, 0, sample_rate / 2)
    
    # Wende die Gewichtsmatrix an
    mel_spectrograms = tf.tensordot(spectrograms, linear_to_mel_weight_matrix, axes=1)
    mel_spectrograms.set_shape(spectrograms.shape[:-1].concatenate(linear_to_mel_weight_matrix.shape[-1:]))
    
    # Logarithmische Skalierung
    log_mel_spectrograms = tf.math.log(mel_spectrograms + 1e-6)
    
    # Berechne die MFCCs
    mfccs = tf.signal.mfccs_from_log_mel_spectrograms(log_mel_spectrograms)[..., :num_mfcc]
    return mfccs

def precompute_mfcc(input_dir, output_dir, sample_rate=16000):
    for root, dirs, files in os.walk(input_dir):
        for file in files:
            if file.endswith('.wav'):
                file_path = os.path.join(root, file)
                print("Processing:", file_path)
                
                # Lade die Audiodatei
                waveform, orig_sr = torchaudio.load(file_path)
                # Bei Stereo: über die Kanäle mitteln
                waveform = waveform.mean(dim=0)
                
                # Resample falls notwendig
                if orig_sr != sample_rate:
                    resample_transform = torchaudio.transforms.Resample(orig_freq=orig_sr, new_freq=sample_rate)
                    waveform = resample_transform(waveform)
                
                # Berechne Fenster- und Hop-Länge (hier als Beispiel 92 ms Fenster und 46 ms Schritt)
                # window_length = int(sample_rate * 0.092)
                # hop_length = int(sample_rate * 0.046)
                window_length = 1024
                hop_length = 512
                
                # Konvertiere in ein NumPy-Array und dann in einen TensorFlow-Tensor
                waveform_np = waveform.cpu().numpy().squeeze()
                waveform_tf = tf.convert_to_tensor(waveform_np, dtype=tf.float32)
                
                # Berechne die MFCCs
                mfccs_tf = compute_tf_mfcc(waveform_tf, sample_rate, window_length, hop_length)
                # Optional: Transponiere, wenn du (num_mfcc, Zeit) erwartest
                mfccs_np = mfccs_tf.numpy().transpose()  # falls benötigt
                
                # Erstelle einen relativen Pfad für die Ausgabe, um die Ordnerstruktur zu erhalten
                relative_path = os.path.relpath(file_path, input_dir)
                output_file = os.path.join(output_dir, os.path.splitext(relative_path)[0] + ".npy")
                os.makedirs(os.path.dirname(output_file), exist_ok=True)
                np.save(output_file, mfccs_np)

train_dataset_path = "/home/student42/Documents/Datset/KerasTUT2017_16khz_FULL/Training"
val_dataset_path = "/home/student42/Documents/Datset/KerasTUT2017_16khz_FULL/Validation"
test_dataset_path = "/home/student42/Documents/Datset/KerasTUT2017_16khz_FULL/Testing"

output_train = "/home/student42/Documents/Datset/numpy_arrays_final/train_dataset"
output_val = "/home/student42/Documents/Datset/numpy_arrays_final/val_dataset"
output_test = "/home/student42/Documents/Datset/numpy_arrays_final/test_dataset"

precompute_mfcc(train_dataset_path, output_train)
precompute_mfcc(val_dataset_path, output_val)
precompute_mfcc(test_dataset_path, output_test)


2025-02-19 14:39:36.420190: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-02-19 14:39:36.444041: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Processing: /home/student42/Documents/Datset/KerasTUT2017_16khz_FULL/Training/forest_path/a066_40_50.wav


2025-02-19 14:39:37.254242: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-02-19 14:39:37.282530: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-02-19 14:39:37.283889: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

Processing: /home/student42/Documents/Datset/KerasTUT2017_16khz_FULL/Training/forest_path/a069_40_50.wav
Processing: /home/student42/Documents/Datset/KerasTUT2017_16khz_FULL/Training/forest_path/a097_60_70.wav
Processing: /home/student42/Documents/Datset/KerasTUT2017_16khz_FULL/Training/forest_path/a066_10_20.wav
Processing: /home/student42/Documents/Datset/KerasTUT2017_16khz_FULL/Training/forest_path/a058_80_90.wav
Processing: /home/student42/Documents/Datset/KerasTUT2017_16khz_FULL/Training/forest_path/a067_170_180.wav
Processing: /home/student42/Documents/Datset/KerasTUT2017_16khz_FULL/Training/forest_path/a067_100_110.wav
Processing: /home/student42/Documents/Datset/KerasTUT2017_16khz_FULL/Training/forest_path/a059_100_110.wav
Processing: /home/student42/Documents/Datset/KerasTUT2017_16khz_FULL/Training/forest_path/a097_170_180.wav
Processing: /home/student42/Documents/Datset/KerasTUT2017_16khz_FULL/Training/forest_path/a056_170_180.wav
Processing: /home/student42/Documents/Datset/