In [2]:
import librosa
import soundfile as sf
import os
import glob
from pathlib import Path

def resample_wav_librosa(input_path, output_path, target_sr=16000):
    """
    使用librosa将WAV文件重采样到目标采样率
    
    Args:
        input_path: 输入音频文件路径
        output_path: 输出音频文件路径
        target_sr: 目标采样率，默认16000Hz
    """
    try:
        # 加载音频文件
        y, orig_sr = librosa.load(input_path, sr=None)
        
        print(f"处理: {os.path.basename(input_path)}")
        print(f"  原始采样率: {orig_sr}Hz")
        print(f"  音频时长: {len(y)/orig_sr:.2f}秒")
        
        # 重采样
        if orig_sr != target_sr:
            y_resampled = librosa.resample(y, orig_sr=orig_sr, target_sr=target_sr)
            print(f"  重采样到: {target_sr}Hz")
        else:
            y_resampled = y
            print("  采样率已经是16000Hz，无需重采样")
        
        # 保存重采样后的音频
        sf.write(output_path, y_resampled, target_sr)
        print(f"  文件已保存: {output_path}")
        print("  " + "-" * 50)
        
        return True
        
    except Exception as e:
        print(f"错误处理文件 {input_path}: {e}")
        return False

def batch_resample_directory(input_dir, output_dir, target_sr=16000, extensions=None):
    """
    批量重采样目录中的所有音频文件
    
    Args:
        input_dir: 输入目录路径
        output_dir: 输出目录路径
        target_sr: 目标采样率，默认16000Hz
        extensions: 支持的音频文件扩展名列表
    """
    if extensions is None:
        extensions = ['*.wav', '*.mp3', '*.flac', '*.ogg', '*.m4a']
    
    # 创建输出目录
    os.makedirs(output_dir, exist_ok=True)
    
    # 查找所有音频文件
    audio_files = []
    for ext in extensions:
        audio_files.extend(glob.glob(os.path.join(input_dir, ext)))
        audio_files.extend(glob.glob(os.path.join(input_dir, ext.upper())))
    
    print(f"在目录 '{input_dir}' 中找到 {len(audio_files)} 个音频文件")
    print("=" * 60)
    
    success_count = 0
    fail_count = 0
    
    for input_path in audio_files:
        # 生成输出路径（保持原文件名）
        filename = os.path.basename(input_path)
        output_path = os.path.join(output_dir, filename)
        
        # 处理文件
        if resample_wav_librosa(input_path, output_path, target_sr):
            success_count += 1
        else:
            fail_count += 1
    
    print("=" * 60)
    print(f"处理完成！成功: {success_count}, 失败: {fail_count}")

# 使用方法
if __name__ == "__main__":
    #[可配置]
    target_sample_rate = 16000
    # 配置输入输出目录 
    # input_directory = './old_data/compare/data_deal_816_output/'
    # output_directory = './old_data/resample/data_deal_816_output/'
    # batch_resample_directory(input_directory, output_directory, target_sample_rate)

    # input_directory = './new_data/compare/1_output'
    # output_directory = './new_data/resample/1_output/'
    # batch_resample_directory(input_directory, output_directory, target_sample_rate)

    # input_directory = './new_data/compare/2_output'
    # output_directory = './new_data/resample/2_output/'
    # batch_resample_directory(input_directory, output_directory, target_sample_rate)

    # input_directory = './new_data/compare/3_output'
    # output_directory = './new_data/resample/3_output/'
    # batch_resample_directory(input_directory, output_directory, target_sample_rate)

    # input_directory = './new_data/compare/4_output'
    # output_directory = './new_data/resample/4_output/'
    # batch_resample_directory(input_directory, output_directory, target_sample_rate)

    # input_directory = './new_data/compare/5_output'
    # output_directory = './new_data/resample/5_output/'
    # batch_resample_directory(input_directory, output_directory, target_sample_rate)

    # input_directory = './new_data/compare/6_output'
    # output_directory = './new_data/resample/6_output/'
    # batch_resample_directory(input_directory, output_directory, target_sample_rate)

    # print("批量重采样完成！")

在目录 './old_data/compare/1_output' 中找到 200 个音频文件
处理: synthesized_speech_1.wav
  原始采样率: 24000Hz
  音频时长: 0.60秒
  重采样到: 16000Hz
  文件已保存: ./old_data/resample/1_output/synthesized_speech_1.wav
  --------------------------------------------------
处理: synthesized_speech_2.wav
  原始采样率: 24000Hz
  音频时长: 7.72秒
  重采样到: 16000Hz
  文件已保存: ./old_data/resample/1_output/synthesized_speech_2.wav
  --------------------------------------------------
处理: synthesized_speech_3.wav
  原始采样率: 24000Hz
  音频时长: 8.62秒
  重采样到: 16000Hz
  文件已保存: ./old_data/resample/1_output/synthesized_speech_3.wav
  --------------------------------------------------
处理: synthesized_speech_4.wav
  原始采样率: 24000Hz
  音频时长: 3.54秒
  重采样到: 16000Hz
  文件已保存: ./old_data/resample/1_output/synthesized_speech_4.wav
  --------------------------------------------------
处理: synthesized_speech_5.wav
  原始采样率: 24000Hz
  音频时长: 6.70秒
  重采样到: 16000Hz
  文件已保存: ./old_data/resample/1_output/synthesized_speech_5.wav
  -------------------------------------------