# Preprocessing

In [2]:
import librosa
import numpy as np

def preprocess_audio(audio_path, sample_rate=16000):
    """
    Vorverarbeitung einer Audiodatei: Resampling und Normalisierung.

    Args:
        audio_path (str): Pfad zur Audiodatei.
        sample_rate (int): Ziel-Sampling-Rate (Standard: 16000).

    Returns:
        np.array: Das vorverarbeitete Audio-Signal.
    """
    # Audiodatei laden und resamplen
    audio_signal, _ = librosa.load(audio_path, sr=sample_rate)

    # Zu Mono konvertieren (falls mehrkanalig)
    audio_signal = librosa.to_mono(audio_signal)

    # Normalisieren
    max_val = np.max(np.abs(audio_signal))
    if max_val > 0:
        audio_signal = audio_signal / max_val

    return audio_signal

# Funktion Log Energy

In [3]:
def calculate_log_energy(audio_signal):
    """
    Berechnet die logarithmische Energie eines Audiosignals.

    Quelle:
        ETSI ES 201 108 V1.1.2 (2000-04), Speech Processing, Transmission and Quality aspects (STQ);
        Distributed speech recognition; Front-end feature extraction algorithm; Compression algorithms.

    Args:
        audio_signal (np.array): Das Audio-Signal (1D-Array).

    Returns:
        float: Die logarithmische Energie.
    """
    # Quadrieren der Amplituden und Summieren
    energy = np.sum(audio_signal**2)

    # Logarithmus berechnen mit Floor-Wert aus dem ETSI-Standard
    log_energy = np.log(energy + 2e-22)  # Offset: 2e-22, um log(0) zu vermeiden
    return log_energy

# Test

In [4]:
import os

# Pfad zum Ordner mit Audiodateien
input_dir = "../audio_files"

# Test: Log Energy f√ºr jede Datei berechnen
for file_name in sorted(os.listdir(input_dir)):  # Alphabetische Sortierung
    if file_name.endswith(".wav"):
        file_path = os.path.join(input_dir, file_name)
        
        # Audiodatei vorverarbeiten
        audio_signal = preprocess_audio(file_path)
        
        # Log Energy berechnen
        log_energy = calculate_log_energy(audio_signal)
        
        print(f"Log Energy ({file_name}): {log_energy}")

Log Energy (_noise_pink.wav): 9.46723747253418
Log Energy (_noise_white.wav): 8.19981575012207
Log Energy (_signal_constant.wav): 9.680343627929688
Log Energy (_signal_silence.wav): -49.96372604370117
Log Energy (_signal_sine.wav): 8.987192153930664
Log Energy (example1.wav): 7.388055801391602
Log Energy (example2.wav): 7.668902397155762
Log Energy (example3.wav): 6.209742546081543
