In [None]:

import numpy as np
import tensorflow as tf
import pathlib
from pathlib import Path
import librosa
import os
from collections import Counter
from pydub import AudioSegment


file_list = None
train_ds = None
val_ds = None
test_ds = None
label_names = None
example_audio = None
example_labels = None
example_filenames = None




TRAIN_DIR = None
TEST_DIR = None
DATA_DIR = None
VAL_DIR = None

SECONDS = None
BATCH_SIZE = None


Divided into directories this way, you can easily load the data using `keras.utils.audio_dataset_from_directory`.

The audio clips are 1 second or less at 16kHz. The `output_sequence_length=16000` pads the short ones to exactly 1 second (and would trim longer ones) so that they can be easily batched.


In [2]:
def shortestTrack(DIR):
    # Initialisierung von Variablen
    shortest_duration = float('inf')  # Setze eine sehr große Zahl für den kürzesten Track

    # Durchlaufe alle Unterordner (Labels) im angegebenen Verzeichnis
    for label in os.listdir(DIR):
        label_dir = os.path.join(DIR, label)

        # Überprüfe, ob es sich um ein Verzeichnis handelt
        if os.path.isdir(label_dir):
            for filename in os.listdir(label_dir):
                file_path = os.path.join(label_dir, filename)

                # Überprüfe, ob es sich um eine Datei handelt
                if os.path.isfile(file_path):
                    try:
                        # Lade das Audiofile mit der Original-Sample-Rate
                        audio_data, sr = librosa.load(file_path, sr=None)  # Keine Resampling, behalte Original-Sample-Rate
                        
                        # Berechne die Dauer des Tracks in Sekunden
                        track_length = len(audio_data) / sr  # Dauer in Sekunden

                        # Ausgabe der Dauer jedes Tracks zur Überprüfung
                        # print(f"Track {filename} hat {len(audio_data)} Samples, entspricht {track_length:.2f} Sekunden.")
                        
                        # Vergleiche mit der kürzesten Dauer
                        if track_length < shortest_duration:
                            shortest_duration = track_length

                    except Exception as e:
                        print(f"Fehler beim Laden der Datei {file_path}: {e}")

    # Gebe die kürzeste Dauer in Sekunden zurück
    if shortest_duration != float('inf'):
        print(f"Die Dauer des kuerzesten Tracks ist: {shortest_duration:.2f} Sekunden.")
        return shortest_duration
    else:
        print("Kein Audio-Track gefunden.")
        return None

In [3]:
def build_DS(DIR):
    labels = [label for label in os.listdir(DIR) if os.path.isdir(os.path.join(DIR, label))]
    label_to_index = {label: idx for idx, label in enumerate(labels)}

    audio_files = []
    audio_labels = []

    sample_rate = 44100

    # Wenn SECONDS auf None ist, bestimme den kürzesten Track
    if SECONDS is None:
        seconds = shortestTrack(DIR)  # Kürzesten Track ermitteln
        if seconds is None:
            print("Kein Track im Verzeichnis gefunden.")
            return None, None  # Keine Tracks gefunden
        print(f"No SECONDS specified. Using shortest track length: {seconds} seconds")
    else:
        seconds = SECONDS
        print(f"Splitting tracks into {seconds}-second segments.")

    # Berechne die Segmentlänge in Samples
    output_length = int(sample_rate * seconds)

    for label in labels:
        label_dir = os.path.join(DIR, label)

        if os.path.exists(label_dir):
            for filename in os.listdir(label_dir):
                file_path = os.path.join(label_dir, filename)

                if os.path.isfile(file_path):
                    try:
                        # Lade die Datei
                        audio_data, _ = librosa.load(file_path, sr=sample_rate)

                        # Berechne die Dauer des Tracks
                        track_length = len(audio_data)  # Anzahl der Samples

                        # Überprüfe, ob der Track lang genug ist für die Segmente
                        if track_length < output_length:
                            print(f"Skipping {file_path}: Track zu kurz ({track_length / sample_rate:.2f} Sekunden).")
                            continue  # Wenn der Track zu kurz für das Segment ist, überspringen

                        # Berechne die Anzahl der möglichen Segmente
                        num_splits = track_length // output_length

                        # Erstelle exakte Segmente
                        for i in range(num_splits):
                            start = i * output_length
                            end = start + output_length
                            audio_splitter = audio_data[start:end]

                            audio_files.append(audio_splitter)
                            audio_labels.append(label_to_index[label])

                    except Exception as e:
                        print(f"Fehler beim Laden der Datei {file_path}: {e}")
        else:
            print(f"Ordner {label_dir} existiert nicht oder keine Zugriffsrechte.")

    # TensorFlow-Tensor erstellen
    audio_files = tf.convert_to_tensor(audio_files)
    audio_files = tf.expand_dims(audio_files, axis=-1)  # Kanal-Dimension hinzufügen

    audio_labels = tf.convert_to_tensor(audio_labels, dtype=tf.int64)



    # Datensatz erstellen
    dataset = tf.data.Dataset.from_tensor_slices((audio_files, audio_labels))
    num_samples = len(audio_files)
    print(f"Anzahl samples fuer {DIR}: {num_samples}")
    batch_size = min(BATCH_SIZE, num_samples)
    # dataset = dataset.batch(batch_size)
    print(f"Batch Size: {batch_size}")
    dataset = dataset.shuffle(buffer_size=len(dataset)).batch(batch_size)
    # dataset = dataset.shuffle(buffer_size=len(dataset)).batch(BATCH_SIZE)

    label_names = np.sort(np.array(labels))
    print(f"Manuell gesetzte label names fuer {DIR}: ", label_names)

    return dataset, label_names

In [None]:
def build():
    global train_ds
    global val_ds
    global test_ds
    global label_names
    global TRAIN_DIR, VAL_DIR, TEST_DIR
    
    def split_audio_dataset(input_dir):
        segment_length_ms = SECONDS * 1000
        output_dir = input_dir.parent / f"{input_dir.name}_splits"
        
        if output_dir.exists():
            print(f"Output directory already exists: {output_dir}. Skipping splitting.")
            return output_dir
        
        os.makedirs(output_dir, exist_ok=True)

        # Traverse the input directory and split files
        for subdir, _, files in os.walk(input_dir):
            relative_path = Path(subdir).relative_to(input_dir)  # Maintain subdirectory structure
            target_dir = output_dir / relative_path
            os.makedirs(target_dir, exist_ok=True)

            for file in files:
                if file.endswith(".wav"):  # Adjust formats as needed
                    input_path = Path(subdir) / file
                    audio = AudioSegment.from_file(input_path)
                    duration = len(audio)  # Total length of the audio in milliseconds

                    # Split and export segments
                    for i, start_time in enumerate(range(0, duration, segment_length_ms)):
                        end_time = min(start_time + segment_length_ms, duration)
                        segment = audio[start_time:end_time]
                        file_path = Path(file)  # Convert file string to Path
                        segment_filename = target_dir / f"{file_path.stem}_segment_{i}.wav"
                        segment.export(segment_filename, format="wav")
                    print(f"Processed {file} into {i+1} segments in {target_dir}")

        print(f"All files processed. Split dataset saved in {output_dir}")
        return output_dir


    if(SECONDS is not None):
        print(f"Split Audio Data in {SECONDS}")
        TRAIN_DIR=split_audio_dataset(TRAIN_DIR)
        VAL_DIR=split_audio_dataset(VAL_DIR)
        TEST_DIR=split_audio_dataset(TEST_DIR)


    train_ds = tf.keras.utils.audio_dataset_from_directory(
        directory=TRAIN_DIR,
        batch_size = BATCH_SIZE,
        seed=0,
        output_sequence_length=None,
        ) 
    
    val_ds = tf.keras.utils.audio_dataset_from_directory(
        directory=VAL_DIR,
        batch_size=BATCH_SIZE,
        seed=0,
        output_sequence_length=None,
        ) 
    
    test_ds = tf.keras.utils.audio_dataset_from_directory(
        directory=TEST_DIR,
        batch_size=BATCH_SIZE,
        seed=0,
        output_sequence_length=None,
        ) 

    label_names = np.array(train_ds.class_names)
        
    train_ds.element_spec

In [None]:
def build_my():
    global train_ds
    global val_ds
    global test_ds
    global label_names

    
    train_ds, label_names = build_DS(TRAIN_DIR)
    val_ds, _ = build_DS(VAL_DIR)
    test_ds, _ = build_DS(TEST_DIR)
    
    train_ds.element_spec

The dataset now contains batches of audio clips and integer labels. The audio clips have a shape of `(batch, samples, channels)`.


This dataset only contains single channel audio, so use the `tf.squeeze` function to drop the extra axis:


In [5]:
def drop():
  global train_ds
  global val_ds
  global test_ds
  
  @tf.autograph.experimental.do_not_convert
  def squeeze(audio, labels):
    audio = tf.squeeze(audio, axis=-1)
    return audio, labels

  train_ds = train_ds.map(squeeze, tf.data.AUTOTUNE)
  val_ds = val_ds.map(squeeze, tf.data.AUTOTUNE)
  test_ds = test_ds.map(squeeze, tf.data.AUTOTUNE)

The `utils.audio_dataset_from_directory` function only returns up to two splits. It's a good idea to keep a test set separate from your validation set.
Ideally you'd keep it in a separate directory, but in this case you can use `Dataset.shard` to split the validation set into two halves. Note that iterating over **any** shard will load **all** the data, and only keep its fraction.


In [6]:
def split():
    global test_ds
    global val_ds
    test_ds = val_ds.shard(num_shards=2, index=0)
    val_ds = val_ds.shard(num_shards=2, index=1)

In [7]:
def extract():
    global example_audio
    global example_labels
    global example_filenames
    for example_audio, example_labels in train_ds.take(1):
        print("Audio Shape:", example_audio.shape)
        print("Labels Shape:", example_labels.shape)
        batch_labels = example_labels.numpy()
        print("Batch Labels:", batch_labels)
        
    label_counts = Counter(batch_labels)

    print("Anzahl der Labels im Batch:")
    for label, count in label_counts.items():
        print(f"Label {label}: {count}")
    
    total_labels = len(batch_labels)
    print(f"Gesamtanzahl der Labels im Batch: {total_labels}")

    example_filenames = []
    for filepath in file_list.take(len(example_audio)):
        example_filenames.append(pathlib.Path(filepath.numpy().decode('utf-8')).name)

In [8]:
def countLabels():
    all_labels = []

    for _, la in train_ds.unbatch():
        all_labels.append(la.numpy())

    all_labels = np.array(all_labels)

    label_counts = Counter(all_labels)

    print("Anzahl der Labels im gesamten Dataset:")
    for label, count in label_counts.items():
        print(f"Label {label}: {count}")
    
    total_labels = len(all_labels)
    print(f"Gesamtanzahl der Labels im Dataset: {total_labels}")

In [9]:
def compare():    
    file_labels = []
    for dirpath, dirnames, filenames in os.walk(TRAIN_DIR):
        for filename in filenames:
            if filename.endswith('.wav'):
                label = filename.split('_')[0]
                file_labels.append(label)

    # Erstelle ein Mapping für die Labels
    label_mapping = {
        'orig-16-44-mono': 1,  # Beispiel: positiv
        'upscale-from-mp3-128': 0,  # Beispiel: negativ
    }

    # Erstelle eine Liste der gemappten Labels
    mapped_labels = [label_mapping.get(label, -1) for label in file_labels]
    print("Gemappte Labels aus Dateinamen:", mapped_labels)

    # Vergleiche die gemappten Labels mit den Batch-Labels
    for i in range(min(len(mapped_labels), len(example_labels.numpy()))):
        print(f"Index: {i}, Batch Label: {example_labels.numpy()[i]}, Gemapptes Label: {mapped_labels[i]}")

In [None]:
def run(_TRAIN_DIR, _TEST_DIR, _DATA_DIR, _VAL_DIR, _SECONDS, _BATCH_SIZE):
    global TRAIN_DIR, TEST_DIR, DATA_DIR, VAL_DIR, SECONDS, BATCH_SIZE

    TRAIN_DIR =_TRAIN_DIR
    TEST_DIR =_TEST_DIR
    DATA_DIR = _DATA_DIR
    VAL_DIR = _VAL_DIR
    SECONDS = _SECONDS
    BATCH_SIZE = _BATCH_SIZE

    build()
    # build_my()
    drop()
    # split()
    # countLabels()
    # extract()
    # compare()
    
    return train_ds, val_ds, test_ds, label_names

In [11]:
import numpy as np
import tensorflow as tf
import pathlib
from pathlib import Path
import os
import librosa
def test ():
    global TRAIN_DIR, TEST_DIR, DATA_DIR, VAL_DIR
    TRAIN_DIR = pathlib.Path('data/small_train_ds')
    TEST_DIR = pathlib.Path('data/small_test_ds')
    DATA_DIR = pathlib.Path('data')
    VAL_DIR = pathlib.Path('data/small_val_ds')
    
    build()
    drop()
    split()
    extract()
    # compare()
    

# test()