# Generate Benchmark Json


In [None]:
import json
import pandas as pd # type: ignore
from pathlib import Path
import numpy as np
import librosa
import scipy.signal
import scipy.io.wavfile as wav

def total_harmonic_distortion(file_path): #ratio
    # Load the audio signal
    signal, sr = librosa.load(file_path, sr=None)

    # Perform FFT on the signal
    fft_spectrum = np.fft.fft(signal)
    fft_magnitude = np.abs(fft_spectrum[:len(fft_spectrum) // 2])  # Only keep positive frequencies
    freqs = np.fft.fftfreq(len(signal), d=1/sr)[:len(fft_spectrum) // 2]

    # Find peaks in the FFT spectrum
    peaks, properties = scipy.signal.find_peaks(fft_magnitude, height=np.max(fft_magnitude) * 0.1)  # Consider peaks above 10% max amplitude

    if len(peaks) < 2:
        raise ValueError(f"Not enough peaks found to compute THD in file: {file_path}")

    # Sort peaks by amplitude (descending)
    sorted_indices = np.argsort(properties["peak_heights"])[::-1]
    sorted_peaks = peaks[sorted_indices]

    # Assume the highest peak is the fundamental frequency
    fundamental_freq = freqs[sorted_peaks[0]]
    fundamental_amp = fft_magnitude[sorted_peaks[0]]

    # Compute THD by summing the power of the harmonics
    harmonic_power = 0
    for peak in sorted_peaks[1:]:  # Ignore fundamental, check harmonics
        harmonic_freq = freqs[peak]
        if np.isclose(harmonic_freq % fundamental_freq, 0, atol=1):  # Ensure it's a harmonic
            harmonic_power += fft_magnitude[peak] ** 2

    if harmonic_power == 0:
        print(f"No harmonics found for THD calculation in file: {file_path}")

    thd = (np.sqrt(harmonic_power) / fundamental_amp)
    return thd

def noise_floor(file_path, segment_duration=0.5): #decibels
    # Load the audio signal
    signal, sr = librosa.load(file_path, sr=None)

    # Calculate segment length in samples
    segment_length = int(segment_duration * sr)

    # Split the signal into segments
    segments = [signal[i:i + segment_length] for i in range(0, len(signal), segment_length)]

    # Calculate RMS for each segment
    rms_values = [np.sqrt(np.mean(segment**2)) for segment in segments]

    # Average RMS value as the noise floor
    noise_floor = np.mean(rms_values)

    # Convert to decibels
    noise_floor_db = 20 * np.log10(noise_floor)

    return noise_floor_db


def dynamic_range(file_path):  # Decibels
    # Read the wav file
    sample_rate, data = wav.read(file_path)

    # Convert to float to avoid integer-related issues
    data = data.astype(np.float32)

    # Convert to mono if stereo
    if len(data.shape) > 1:
        data = np.mean(data, axis=1)  # Take mean of channels to make mono

    # Remove NaN values properly
    if np.isnan(data).any():
        data = data[~np.isnan(data)]

    # Ensure data isn't empty after filtering
    if data.size == 0:
        return float('-inf')  # Avoid errors in log calculation

    # Compute peak amplitude
    peak_amplitude = np.max(np.abs(data))

    # Compute RMS amplitude safely to avoid log0
    rms_amplitude = np.sqrt(np.mean(data**2) + 1e-10)

    # Compute dynamic range in dB
    dynamic_range = 20 * np.log10(peak_amplitude / rms_amplitude)

    return dynamic_range


def crest_factor(file_path, segment_duration=0.5):  # dB
    # Load the audio signal
    signal, sr = librosa.load(file_path, sr=None)

    # Calculate segment length in samples
    segment_length = int(segment_duration * sr)

    # Split the signal into segments
    segments = [signal[i:i + segment_length] for i in range(0, len(signal), segment_length)]

    # Calculate peak and RMS for each segment
    crest_factors = []
    for segment in segments:
        peak = np.max(np.abs(segment))
        rms = np.sqrt(np.mean(segment**2))

        # Compute crest factor in ratio and convert to dB
        if rms > 0:
            crest_factor_db = 20 * np.log10(peak / rms)
            crest_factors.append(crest_factor_db)

    # Average crest factor in dB across segments
    average_crest_factor_db = np.mean(crest_factors) if crest_factors else float('-inf')

    return average_crest_factor_db


def signal_noise_ratio(file_path, noise_duration=0.5, signal_duration=2.0): #decibels
    # Load the audio signal
    signal, sr = librosa.load(file_path, sr=None)

    # Calculate noise and signal lengths in samples
    noise_length = int(noise_duration * sr)
    signal_length = int(signal_duration * sr)

    # Get noise and signal segments
    noise_segment = signal[:noise_length]
    signal_segment = signal[noise_length:noise_length + signal_length]

    # Calculate RMS of noise and signal
    noise_rms = np.sqrt(np.mean(noise_segment**2))
    signal_rms = np.sqrt(np.mean(signal_segment**2))

    # Calculate SNR in decibels
    snr_db = 20 * np.log10(signal_rms / noise_rms) if noise_rms > 0 else float('inf')  # Avoid division by zero

    return snr_db


def waveform_complexity_index(file_path): #relative value
    # Load audio file
    signal, sr = librosa.load(file_path, sr=None, mono=True)

    # Compute Zero-Crossing Rate (ZCR) - Measures frequency of sign changes
    zcr = librosa.feature.zero_crossing_rate(y=signal)
    avg_zcr = np.mean(zcr)

    # Compute Spectral Entropy manually (Energy Distribution across Frequencies)
    spectrum = np.abs(librosa.stft(signal))  # Compute magnitude spectrum
    spectrum = spectrum / np.sum(spectrum, axis=0, keepdims=True)  # Normalize
    spectral_entropy = scipy.stats.entropy(spectrum, axis=0)  # Compute entropy
    avg_entropy = np.mean(spectral_entropy)

    # Compute RMS Energy Variation - Measures fluctuations in loudness
    rms_energy = librosa.feature.rms(y=signal)
    rms_variation = np.std(rms_energy)  # Standard deviation to measure fluctuation

    # Compute Waveform Complexity Index (WCI)
    wci = (avg_zcr * avg_entropy) / (1 + rms_variation)  # Normalize by dynamic range
    return wci

# Load tags from a spreadsheet
def load_tags_from_spreadsheet(spreadsheet_path):
    """Load tag information from a spreadsheet."""
    return pd.read_excel(spreadsheet_path, keep_default_na=True, na_values = ["N/A"])

def get_tags_from_spreadsheet(file_name, tag_data):
    """
    Extract tags for a specific file name from the spreadsheet data.

    Args:
        file_name (str): Name of the file to lookup (without extension).
        tag_data (pd.DataFrame): DataFrame containing tag information.

    Returns:
        dict: Dictionary of tags for the file.
    """
    file_name_no_ext = file_name.replace(".wav", "")  # Ensure extension is removed
    row = tag_data[tag_data['ID'] == file_name_no_ext]
    if not row.empty:
        row = row.iloc[0]
        print("Processed " + file_name_no_ext)
        return {
            "ID": row.get("ID", None),
            "Length": int(row.get("Length (S)", None)),
            "Location": row.get("Location", None),
            "Indoors": row.get("Indoors", None),
            "Crowded": row.get("Crowded", None),
            "Speaking": row.get("Speaking", None),
            "Walking": row.get("Walking", None),
            "Environment Type": row.get("Environment Type", None),
            "Voice Type": str(row.get("Voice Type", None)),
            "Voice ID": str(row.get("Voice ID", None))
        }
    return {}

def process_audio(file_path):
    """
    Process an audio file to calculate metrics.

    Args:
        file_path (str): Path to the .wav file.

    Returns:
        dict: Dictionary of metrics with values.
    """
    return {
        "Total_Harmonic_Distortion": float(total_harmonic_distortion(file_path)),
        "Signal_Noise_Ratio": float(signal_noise_ratio(file_path)),
        "Noise_Floor": float(noise_floor(file_path)),
        "Dynamic_Range": float(dynamic_range(file_path)),
        "Crest_Factor": float(crest_factor(file_path)),
        "Waveform_Complexity_Index": float(waveform_complexity_index(file_path)),
    }

def generate_json_data(folder_path, output_file, spreadsheet_path, model_name, runtime):
    """
    Generate JSON data for audio files in a folder.

    Args:
        folder_path (str): Path to the folder containing .wav files.
        output_file (str): Path to the output JSON file.
        spreadsheet_path (str): Path to the spreadsheet containing tag data.
    """
    audio_files = list(Path(folder_path).rglob("*.wav"))
    tag_data = load_tags_from_spreadsheet(spreadsheet_path)

    if not audio_files:
        print("No .wav files found in the specified folder.")
        return

    data_store = []

    for file_path in audio_files:
        file_name = file_path.stem
        metrics = process_audio(file_path)
        file_tags = get_tags_from_spreadsheet(file_name, tag_data)

        entry = {"ID": file_name}
        entry.update(file_tags)
        entry.update(metrics)

        data_store.append(entry)
        print(f"Successfully Added {file_path} to store")

    # Add top-level metrics
    top_level_metrics = {
        "audio_model": model_name,  # Replace with actual audio model name
        "runtime": runtime # runtime in seconds
    }

    # Combine top-level metrics with data_store
    output_data = {
        "top_level_metrics": top_level_metrics,
        "files": data_store
    }

    # Save the data to a JSON file
    with open(output_file, "w") as json_file:
        json.dump(output_data, json_file, indent=4)

    print(f"JSON data saved to {output_file}")

# Example usage
if __name__ == "__main__":
    folder_path = "./Spectral_Subtraction"  # Replace with your folder path
    model_name = "spectral_subtraction" # Replace with model title
    output_file = f"./{model_name}_benchmark_data.json"  # Replace with your desired output file
    spreadsheet_path = "./table.xlsx"  # Replace with your spreadsheet path
    generate_json_data(folder_path, output_file, spreadsheet_path, model_name, 0)


FileNotFoundError: [Errno 2] No such file or directory: './table.xlsx'



# Compare Benchmark Jsons

# **ALL MODELS**

In [None]:
import json
import pandas as pd # type: ignore
from pathlib import Path
import numpy as np
import librosa
import scipy.signal
import scipy.io.wavfile as wav
def total_harmonic_distortion(file_path): #ratio
    # Load the audio signal
    signal, sr = librosa.load(file_path, sr=None)

    # Perform FFT on the signal
    fft_spectrum = np.fft.fft(signal)
    fft_magnitude = np.abs(fft_spectrum[:len(fft_spectrum) // 2])  # Only keep positive frequencies
    freqs = np.fft.fftfreq(len(signal), d=1/sr)[:len(fft_spectrum) // 2]

    # Find peaks in the FFT spectrum
    peaks, properties = scipy.signal.find_peaks(fft_magnitude, height=np.max(fft_magnitude) * 0.1)  # Consider peaks above 10% max amplitude

    if len(peaks) < 2:
        raise ValueError(f"Not enough peaks found to compute THD in file: {file_path}")

    # Sort peaks by amplitude (descending)
    sorted_indices = np.argsort(properties["peak_heights"])[::-1]
    sorted_peaks = peaks[sorted_indices]

    # Assume the highest peak is the fundamental frequency
    fundamental_freq = freqs[sorted_peaks[0]]
    fundamental_amp = fft_magnitude[sorted_peaks[0]]

    # Compute THD by summing the power of the harmonics
    harmonic_power = 0
    for peak in sorted_peaks[1:]:  # Ignore fundamental, check harmonics
        harmonic_freq = freqs[peak]
        if np.isclose(harmonic_freq % fundamental_freq, 0, atol=1):  # Ensure it's a harmonic
            harmonic_power += fft_magnitude[peak] ** 2

    if harmonic_power == 0:
        print(f"No harmonics found for THD calculation in file: {file_path}")

    thd = (np.sqrt(harmonic_power) / fundamental_amp)
    return thd

def noise_floor(file_path, segment_duration=0.5): #decibels
    # Load the audio signal
    signal, sr = librosa.load(file_path, sr=None)

    # Calculate segment length in samples
    segment_length = int(segment_duration * sr)

    # Split the signal into segments
    segments = [signal[i:i + segment_length] for i in range(0, len(signal), segment_length)]

    # Calculate RMS for each segment
    rms_values = [np.sqrt(np.mean(segment**2)) for segment in segments]

    # Average RMS value as the noise floor
    noise_floor = np.mean(rms_values)

    # Convert to decibels
    noise_floor_db = 20 * np.log10(noise_floor)

    return noise_floor_db


def dynamic_range(file_path):  # Decibels
    # Read the wav file
    sample_rate, data = wav.read(file_path)

    # Convert to float to avoid integer-related issues
    data = data.astype(np.float32)

    # Convert to mono if stereo
    if len(data.shape) > 1:
        data = np.mean(data, axis=1)  # Take mean of channels to make mono

    # Remove NaN values properly
    if np.isnan(data).any():
        data = data[~np.isnan(data)]

    # Ensure data isn't empty after filtering
    if data.size == 0:
        return float('-inf')  # Avoid errors in log calculation

    # Compute peak amplitude
    peak_amplitude = np.max(np.abs(data))

    # Compute RMS amplitude safely avoiding log(0)
    rms_amplitude = np.sqrt(np.mean(data**2) + 1e-10)

    # Compute dynamic range in dB
    dynamic_range = 20 * np.log10(peak_amplitude / rms_amplitude)

    return dynamic_range


def crest_factor(file_path, segment_duration=0.5):  # dB
    # Load the audio signal
    signal, sr = librosa.load(file_path, sr=None)

    # Calculate segment length in samples
    segment_length = int(segment_duration * sr)

    # Split the signal into segments
    segments = [signal[i:i + segment_length] for i in range(0, len(signal), segment_length)]

    # Calculate peak and RMS for each segment
    crest_factors = []
    for segment in segments:
        peak = np.max(np.abs(segment))
        rms = np.sqrt(np.mean(segment**2))

        # Compute crest factor in ratio and convert to dB
        if rms > 0:
            crest_factor_db = 20 * np.log10(peak / rms)
            crest_factors.append(crest_factor_db)

    # Average crest factor in dB across segments
    average_crest_factor_db = np.mean(crest_factors) if crest_factors else float('-inf')

    return average_crest_factor_db


def signal_noise_ratio(file_path, noise_duration=0.5, signal_duration=2.0): #decibels
    # Load the audio signal
    signal, sr = librosa.load(file_path, sr=None)

    # Calculate noise and signal lengths in samples
    noise_length = int(noise_duration * sr)
    signal_length = int(signal_duration * sr)

    # Get noise and signal segments
    noise_segment = signal[:noise_length]
    signal_segment = signal[noise_length:noise_length + signal_length]

    # Calculate RMS of noise and signal
    noise_rms = np.sqrt(np.mean(noise_segment**2))
    signal_rms = np.sqrt(np.mean(signal_segment**2))

    # Calculate SNR in decibels
    snr_db = 20 * np.log10(signal_rms / noise_rms) if noise_rms > 0 else float('inf')  # Avoid division by zero

    return snr_db


def waveform_complexity_index(file_path): #relative value
    # Load audio file
    signal, sr = librosa.load(file_path, sr=None, mono=True)

    # Compute Zero-Crossing Rate (ZCR) - Measures frequency of sign changes
    zcr = librosa.feature.zero_crossing_rate(y=signal)
    avg_zcr = np.mean(zcr)

    # Compute Spectral Entropy manually (Energy Distribution across Frequencies)
    spectrum = np.abs(librosa.stft(signal))  # Compute magnitude spectrum
    spectrum = spectrum / np.sum(spectrum, axis=0, keepdims=True)  # Normalize
    spectral_entropy = scipy.stats.entropy(spectrum, axis=0)  # Compute entropy
    avg_entropy = np.mean(spectral_entropy)

    # Compute RMS Energy Variation - Measures fluctuations in loudness
    rms_energy = librosa.feature.rms(y=signal)
    rms_variation = np.std(rms_energy)  # Standard deviation to measure fluctuation

    # Compute Waveform Complexity Index (WCI)
    wci = (avg_zcr * avg_entropy) / (1 + rms_variation)  # Normalize by dynamic range
    return wci
