# Preprocessing

In [1]:
import librosa
import numpy as np

def preprocess_audio(audio_path, sample_rate=16000):
    """
    Vorverarbeitung einer Audiodatei: Resampling und Normalisierung.

    Args:
        audio_path (str): Pfad zur Audiodatei.
        sample_rate (int): Ziel-Sampling-Rate (Standard: 16000).

    Returns:
        np.array: Das vorverarbeitete Audio-Signal.
    """
    # Audiodatei laden und resamplen
    audio_signal, _ = librosa.load(audio_path, sr=sample_rate)

    # Zu Mono konvertieren (falls mehrkanalig)
    audio_signal = librosa.to_mono(audio_signal)

    # Normalisieren
    max_val = np.max(np.abs(audio_signal))
    if max_val > 0:
        audio_signal = audio_signal / max_val

    return audio_signal

# Funktion Zero Crossing Rate

In [3]:
def calculate_zcr(audio_signal, frame_size=1024):
    """
    Berechnet die Zero Crossing Rate (ZCR) eines Audiosignals.

    Quelle:
        Tom Bäckström et al., "Introduction to Speech Processing", 2nd Edition, 2022.
        DOI: 10.5281/zenodo.6821775
        URL: https://speechprocessingbook.aalto.fi/Representations/Zero-crossing_rate.html

    Args:
        audio_signal (np.array): Das Audio-Signal (1D-Array).
        frame_size (int): Die Anzahl der Samples pro Frame.

    Returns:
        float: Durchschnittliche Zero Crossing Rate des Signals.
    """
    # Anzahl der Frames
    num_frames = len(audio_signal) // frame_size
    frames = audio_signal[:num_frames * frame_size].reshape((num_frames, frame_size))

    # ZCR pro Frame berechnen
    zcr_per_frame = []
    for frame in frames:
        zcr = np.sum(np.abs(np.diff(np.sign(frame)))) / (2 * len(frame))
        zcr_per_frame.append(zcr)

    # Durchschnittliche ZCR
    return np.mean(zcr_per_frame)

# Test

In [5]:
import os# Beispiel: ZCR für alle Dateien berechnen
for file_name in sorted(os.listdir("../audio_files")):  # Alphabetische Sortierung
    if file_name.endswith(".wav"):
        file_path = os.path.join("../audio_files", file_name)
        
        # Audiodatei vorverarbeiten
        audio_signal = preprocess_audio(file_path)
        
        # ZCR berechnen
        zcr_value = calculate_zcr(audio_signal)
        
        print(f"ZCR ({file_name}): {zcr_value:.4f}")

ZCR (_noise_pink.wav): 0.0658
ZCR (_noise_white.wav): 0.4907
ZCR (_signal_constant.wav): 0.0000
ZCR (_signal_silence.wav): 0.0000
ZCR (_signal_sine.wav): 0.0550
ZCR (_snr_03.wav): 0.0874
ZCR (_snr_10.wav): 0.0854
ZCR (_snr_20.wav): 0.0820
ZCR (example1.wav): 0.0646
ZCR (example2.wav): 0.0834
ZCR (example3.wav): 0.1078


In [2]:
import librosa
import numpy as np

def calculate_zcr(audio_signal, frame_length=1024, hop_length=512):
    """
    Berechnet die Zero Crossing Rate (ZCR) eines Audiosignals mithilfe der Librosa-Bibliothek.

    Quelle:
        Librosa Documentation: https://librosa.org/doc/main/generated/librosa.feature.zero_crossing_rate.html

    Args:
        audio_signal (np.array): Das normalisierte Audio-Signal (1D-Array).
        frame_length (int): Länge eines Frames in Samples (Standard: 1024).
        hop_length (int): Schrittweite zwischen Frames in Samples (Standard: 512).

    Returns:
        float: Durchschnittliche Zero Crossing Rate des Signals.
    """
    zcr_values = librosa.feature.zero_crossing_rate(y=audio_signal, frame_length=frame_length, hop_length=hop_length)[0]
    return np.mean(zcr_values)

In [3]:
import os

# ZCR für alle Dateien berechnen
for file_name in sorted(os.listdir("../audio_files")):  # Alphabetische Sortierung
    if file_name.endswith(".wav"):
        file_path = os.path.join("../audio_files", file_name)
        
        # Audiodatei vorverarbeiten
        audio_signal = preprocess_audio(file_path)
        
        # ZCR berechnen
        zcr_value = calculate_zcr(audio_signal)
        
        print(f"Zero Crossing Rate ({file_name}): {zcr_value:.4f}")

Zero Crossing Rate (_noise_pink.wav): 0.0635
Zero Crossing Rate (_noise_white.wav): 0.4776
Zero Crossing Rate (_signal_constant.wav): 0.0000
Zero Crossing Rate (_signal_silence.wav): 0.0000
Zero Crossing Rate (_signal_sine.wav): 0.0534
Zero Crossing Rate (_snr_03.wav): 0.0867
Zero Crossing Rate (_snr_10.wav): 0.0847
Zero Crossing Rate (_snr_20.wav): 0.0814
Zero Crossing Rate (example1.wav): 0.0626
Zero Crossing Rate (example2.wav): 0.0837
Zero Crossing Rate (example3.wav): 0.1102
