In [1]:

import numpy as np
import tensorflow as tf
import pathlib
from pathlib import Path
import librosa
import os
from collections import Counter


file_list = None
train_ds = None
val_ds = None
test_ds = None
label_names = None
example_audio = None
example_labels = None
example_filenames = None



TRAIN_DIR = None
TEST_DIR = None
DATA_DIR = None
VAL_DIR = None


Divided into directories this way, you can easily load the data using `keras.utils.audio_dataset_from_directory`.

The audio clips are 1 second or less at 16kHz. The `output_sequence_length=16000` pads the short ones to exactly 1 second (and would trim longer ones) so that they can be easily batched.


In [3]:
def build_DS(DIR, split_audio=False):
    # Labels extrahieren
    labels = [label for label in os.listdir(DIR) if os.path.isdir(os.path.join(DIR, label))]
    label_to_index = {label: idx for idx, label in enumerate(labels)}

    audio_files = []
    audio_labels = []

    sample_rate = 44100
    seconds = 1  # Länge des Splitters in Sekunden
    output_length = int(sample_rate * seconds)

    for label in labels:
        label_dir = os.path.join(DIR, label)

        if os.path.exists(label_dir):
            for filename in os.listdir(label_dir):
                file_path = os.path.join(label_dir, filename)

                if os.path.isfile(file_path):
                    try:
                        # Lade Audiodatei mit Librosa
                        audio_data, _ = librosa.load(file_path, sr=sample_rate)

                        if split_audio:
                            # Berechne die Anzahl der vollständigen Splitter
                            num_splits = len(audio_data) // output_length

                            for i in range(num_splits):
                                start = i * output_length
                                end = start + output_length

                                # Erzeuge den Splitter 
                                audio_splitter = audio_data[start:end]
                                audio_files.append(audio_splitter)
                                audio_labels.append(label_to_index[label])

                        else:
                            # Wenn nicht gesplittet wird, nutze die ganze Datei
                            if len(audio_data) < output_length:
                                # Padden, wenn die Datei kürzer ist als `output_length`
                                audio_data = np.pad(audio_data, (0, output_length - len(audio_data)), mode='constant')
                            elif len(audio_data) > output_length:
                                # Falls zu lang, auf die gewünschte Länge trimmen
                                audio_data = audio_data[:output_length]

                            audio_files.append(audio_data)
                            audio_labels.append(label_to_index[label])

                    except Exception as e:
                        print(f"Fehler beim Laden der Datei {file_path}: {e}")
        else:
            print(f"Ordner {label_dir} existiert nicht oder keine Zugriffsrechte.")

    # Konvertiere Listen in TensorFlow-Tensors
    audio_files = tf.convert_to_tensor(audio_files)
    audio_files = tf.expand_dims(audio_files, axis=-1)

    audio_labels = tf.convert_to_tensor(audio_labels, dtype=tf.int64)

    # Erstelle ein TensorFlow-Dataset aus den Audiodaten und Labels
    dataset = tf.data.Dataset.from_tensor_slices((audio_files, audio_labels))

    # Optional: Batch-Größe und weitere Parameter hinzufügen
    num_samples = len(audio_files)  # Gesamtanzahl der Audiosplitter
    print(f"Anzahl samples fuer {DIR}: {num_samples}")
    batch_size = min(64, num_samples)
    # dataset = dataset.batch(batch_size)
    dataset = dataset.shuffle(buffer_size=len(dataset)).batch(batch_size)

    label_names = np.sort(np.array(labels))

    # Ausgabe der Label-Namen
    print(f"Manuell gesetzte label names fuer {DIR}: ", label_names)

    return dataset, label_names

In [4]:
def build():
    global file_list
    global train_ds
    global val_ds
    global test_ds
    global label_names
    file_list = tf.data.Dataset.list_files(str(TRAIN_DIR / '**/*.wav'), shuffle=False)
    
    split = True
    train_ds, label_names = build_DS(TRAIN_DIR, split)
    val_ds, _ = build_DS(VAL_DIR, split)
    test_ds, _ = build_DS(TEST_DIR, split)
    

    # seconds=40
    # train_ds = tf.keras.utils.audio_dataset_from_directory(
    #     directory=TRAIN_DIR,
    #     batch_size=64,
    #     seed=0,
    #     output_sequence_length=44100*seconds,
    #     ) 
    
    # val_ds = tf.keras.utils.audio_dataset_from_directory(
    #     directory=VAL_DIR,
    #     batch_size=64,
    #     seed=0,
    #     output_sequence_length=44100*seconds,
    #     ) 
    
    # test_ds = tf.keras.utils.audio_dataset_from_directory(
    #     directory=TEST_DIR,
    #     batch_size=64,
    #     seed=0,
    #     output_sequence_length=44100*seconds,
    #     ) 

    # label_names = np.array(train_ds.class_names)
        
# The dataset now contains batches of audio clips and integer labels. 
# The audio clips have a shape of `(batch, samples, channels)`.
    train_ds.element_spec

The dataset now contains batches of audio clips and integer labels. The audio clips have a shape of `(batch, samples, channels)`.


This dataset only contains single channel audio, so use the `tf.squeeze` function to drop the extra axis:


In [5]:
def drop():
  global train_ds
  global val_ds
  global test_ds
  
  @tf.autograph.experimental.do_not_convert
  def squeeze(audio, labels):
    audio = tf.squeeze(audio, axis=-1)
    return audio, labels

  train_ds = train_ds.map(squeeze, tf.data.AUTOTUNE)
  val_ds = val_ds.map(squeeze, tf.data.AUTOTUNE)
  test_ds = test_ds.map(squeeze, tf.data.AUTOTUNE)

The `utils.audio_dataset_from_directory` function only returns up to two splits. It's a good idea to keep a test set separate from your validation set.
Ideally you'd keep it in a separate directory, but in this case you can use `Dataset.shard` to split the validation set into two halves. Note that iterating over **any** shard will load **all** the data, and only keep its fraction.


In [6]:
def split():
    global test_ds
    global val_ds
    test_ds = val_ds.shard(num_shards=2, index=0)
    val_ds = val_ds.shard(num_shards=2, index=1)

In [7]:
def extract():
    global example_audio
    global example_labels
    global example_filenames
    for example_audio, example_labels in train_ds.take(1):
        print("Audio Shape:", example_audio.shape)
        print("Labels Shape:", example_labels.shape)
        batch_labels = example_labels.numpy()
        print("Batch Labels:", batch_labels)
        
        # Zähle die Häufigkeit jedes Labels
    label_counts = Counter(batch_labels)

    # Ausgabe der Anzahl der verschiedenen Labels
    print("Anzahl der Labels im Batch:")
    for label, count in label_counts.items():
        print(f"Label {label}: {count}")
    
    # Gesamte Anzahl der Labels im Batch
    total_labels = len(batch_labels)
    print(f"Gesamtanzahl der Labels im Batch: {total_labels}")

        # Extrahiere Dateinamen aus dem Dataset
    example_filenames = []
    for filepath in file_list.take(len(example_audio)):
        example_filenames.append(pathlib.Path(filepath.numpy().decode('utf-8')).name)

In [None]:
def countLabels():
    all_labels = []

    # Unbatch das Dataset, um alle Labels zu sammeln
    for _, la in train_ds.unbatch():
        all_labels.append(la.numpy())

    # Konvertiere die gesammelten Labels in ein NumPy-Array
    all_labels = np.array(all_labels)

    # Zähle die Häufigkeit jedes Labels
    label_counts = Counter(all_labels)

    # Ausgabe der Anzahl der verschiedenen Labels
    print("Anzahl der Labels im gesamten Dataset:")
    for label, count in label_counts.items():
        print(f"Label {label}: {count}")
    
    # Gesamte Anzahl der Labels
    total_labels = len(all_labels)
    print(f"Gesamtanzahl der Labels im Dataset: {total_labels}")

In [8]:
def compare():    
    file_labels = []
    for dirpath, dirnames, filenames in os.walk(TRAIN_DIR):
        for filename in filenames:
            if filename.endswith('.wav'):
                label = filename.split('_')[0]
                file_labels.append(label)

    # Erstelle ein Mapping für die Labels
    label_mapping = {
        'orig-16-44-mono': 1,  # Beispiel: positiv
        'upscale-from-mp3-128': 0,  # Beispiel: negativ
    }

    # Erstelle eine Liste der gemappten Labels
    mapped_labels = [label_mapping.get(label, -1) for label in file_labels]
    print("Gemappte Labels aus Dateinamen:", mapped_labels)

    # Vergleiche die gemappten Labels mit den Batch-Labels
    for i in range(min(len(mapped_labels), len(example_labels.numpy()))):
        print(f"Index: {i}, Batch Label: {example_labels.numpy()[i]}, Gemapptes Label: {mapped_labels[i]}")

In [9]:
def run(_TRAIN_DIR, _TEST_DIR, _DATA_DIR, _VAL_DIR):
    global TRAIN_DIR, TEST_DIR, DATA_DIR, VAL_DIR

    TRAIN_DIR =_TRAIN_DIR
    TEST_DIR =_TEST_DIR
    DATA_DIR = _DATA_DIR
    VAL_DIR = _VAL_DIR

    build()
    drop()
    # split()
    countLabels()
    extract()
    # compare()
    
    return file_list, example_labels, example_audio, train_ds, val_ds, test_ds, label_names, example_audio, example_labels, example_filenames

In [10]:
import numpy as np
import tensorflow as tf
import pathlib
from pathlib import Path
import os
import librosa
def test ():
    global TRAIN_DIR, TEST_DIR, DATA_DIR, VAL_DIR
    TRAIN_DIR = pathlib.Path('data/small_train_ds')
    TEST_DIR = pathlib.Path('data/small_test_ds')
    DATA_DIR = pathlib.Path('data')
    VAL_DIR = pathlib.Path('data/small_val_ds')
    
    build()
    drop()
    split()
    extract()
    # compare()
    

# test()