In [13]:
import os
import numpy as np
import librosa

In [21]:
# 加载音频文件
def load_wav(wavfile, sr=16000):
    """
    加载音频文件并返回音频信号
    """
    wav, _ = librosa.load(wavfile, sr=sr, mono=True)
    return wav

In [11]:
# 计算SNR
def calculate_snr(reference_signal, generated_signal):
    """
    计算信噪比（SNR）
    reference_signal: 源语音信号
    generated_signal: 生成语音信号
    """
    # 确保两段音频长度相同
    min_length = min(len(reference_signal), len(generated_signal))
    reference_signal = reference_signal[:min_length]
    generated_signal = generated_signal[:min_length]

    # 计算噪声信号 (生成语音 - 源语音)
    noise_signal = generated_signal - reference_signal

    # 计算信号和噪声的功率
    signal_power = np.mean(reference_signal ** 2)
    noise_power = np.mean(noise_signal ** 2)

    # 避免噪声功率为 0 的情况
    if noise_power == 0:
        return float('inf')  # 如果没有噪声，SNR 无穷大

    # 计算 SNR
    snr = 10 * np.log10(signal_power / noise_power)
    return snr

In [14]:
# 主流程函数
def process_snr(mapping_file_path, original_base_path, generated_base_path):
    """
    从映射文件中提取源语音和转换语音，计算SNR
    """
    total_snr = 0
    count = 0

    # 读取映射文件
    with open(mapping_file_path, 'r') as f:
        for line in f:
            parts = line.strip().split('|')

            # 检查是否有足够的字段
            if len(parts) != 3:
                print(f"Invalid line format: {line}")
                continue

            generated_wav, content_wav, _ = parts

            # 构建音频文件路径
            original_wav_path = os.path.join(original_base_path, content_wav)
            generated_wav_path = os.path.join(generated_base_path, generated_wav)

            # 加载源语音和生成语音
            reference_signal = load_wav(original_wav_path)
            generated_signal = load_wav(generated_wav_path)

            # 计算当前文件的SNR
            snr = calculate_snr(reference_signal, generated_signal)
            print(f"SNR for {generated_wav}: {snr} dB")

            total_snr += snr
            count += 1

    # 计算平均SNR
    avg_snr = total_snr / count if count > 0 else float('inf')
    print(f"\nAverage SNR: {avg_snr} dB")

In [19]:
# 示例用法
mapping_file_path = '/home/sun/FreeVC/convert_S.txt'  # 更新后的转换文件路径
original_base_path = '/home/sun/FreeVC/outputs/wav_o'  # 原始音频目录
generated_base_path = '/home/sun/FreeVC/outputs/starganvc_test'  # 生成音频目录


In [23]:
process_snr(mapping_file_path, original_base_path, generated_base_path)

SNR for WTM1_S.wav: -2.859790325164795 dB
SNR for WTM2_S.wav: -1.3938893377780914 dB
SNR for WTM3_S.wav: -7.286527752876282 dB
SNR for WTM4_S.wav: -1.1426932364702225 dB
SNR for WTM5_S.wav: -1.2161889672279358 dB
SNR for WTM6_S.wav: -6.1657702922821045 dB
SNR for WTM7_S.wav: -0.24048374965786934 dB
SNR for WTM8_S.wav: -1.4026466012001038 dB
SNR for WTM9_S.wav: -9.14302110671997 dB
SNR for WTM10_S.wav: -0.00085613333794754 dB
SNR for WTM11_S.wav: -3.6637479066848755 dB
SNR for WTM12_S.wav: -13.682726621627808 dB
SNR for MTW1_S.wav: -0.0018957843712996691 dB
SNR for MTW2_S.wav: -3.4106507897377014 dB
SNR for MTW3_S.wav: -8.935068249702454 dB
SNR for MTW4_S.wav: -3.9233511686325073 dB
SNR for MTW5_S.wav: 0.0005849835361004807 dB
SNR for MTW6_S.wav: -1.0006626695394516 dB
SNR for MTW7_S.wav: -11.554882526397705 dB
SNR for MTW8_S.wav: -7.237020134925842 dB
SNR for MTW9_S.wav: -0.0023567801690660417 dB
SNR for MTW10_S.wav: -1.3523423671722412 dB
SNR for MTW11_S.wav: -4.316782653331757 dB
SNR