In [None]:
# 确保使用本地版本而非安装版本
import sys
import os

from audio_aug import volume_aug_linmax, volume_aug_dbrms, volume_aug_lufs, measure_loudness
import soundfile as sf
import numpy as np
import matplotlib.pyplot as plt

# 导入可视化工具
from neverlib.data_analyze.visualization import AudioVisualizer

wav_path = "/data03/never/Dataset/kws_data/Command_Word/group_a_class/zh/pos_example/上一首/037/soft_normal_male_25_151_vadstart5920_vadend33600_snr14.2.wav"
wav, sr = sf.read(wav_path, always_2d=True, dtype="float32")
wav = wav[:, 1]

# 创建可视化工具
visualizer = AudioVisualizer(sr=sr)

# 测量原始音频的响度信息
original_loudness = measure_loudness(wav, sr)
print("原始音频响度信息:")
print(f"峰值: {original_loudness['peak_dbfs']:.2f} dBFS")
print(f"RMS: {original_loudness['rms_dbfs']:.2f} dBFS")
print(f"LUFS: {original_loudness['lufs']:.2f} LUFS")
print(f"峰均比: {original_loudness['crest_factor_db']:.2f} dB")

ImportError: cannot import name 'volume_aug_linmax' from 'audio_aug' (unknown location)

: 

##  linear 音量增强

In [None]:
wav_linear = volume_aug_linmax(wav, 0.5)

# 测量处理后的响度
linear_loudness = measure_loudness(wav_linear, sr)

# 绘制时域和频域图
plt.figure(figsize=(16, 8))

# 波形图
plt.subplot(2, 1, 1)
visualizer.plot_waveform(wav, "原始波形", ax=plt.gca())
plt.subplot(2, 1, 2)
visualizer.plot_waveform(wav_linear, f"Linear增强波形 (目标幅度: 0.5)", ax=plt.gca())
plt.tight_layout()
plt.show()

# 频谱图
plt.figure(figsize=(16, 8))
plt.subplot(2, 1, 1)
visualizer.plot_spectrogram(wav, "原始频谱图", ax=plt.gca())
plt.subplot(2, 1, 2)
visualizer.plot_spectrogram(wav_linear, f"Linear增强频谱图 (目标幅度: 0.5)", ax=plt.gca())
plt.tight_layout()
plt.show()

# 响度对比
plt.figure(figsize=(10, 6))
labels = ['原始音频', 'Linear增强']
peak_values = [original_loudness['peak_dbfs'], linear_loudness['peak_dbfs']]
rms_values = [original_loudness['rms_dbfs'], linear_loudness['rms_dbfs']]
lufs_values = [original_loudness['lufs'], linear_loudness['lufs']]

x = np.arange(len(labels))
width = 0.25

plt.bar(x - width, peak_values, width, label='峰值 (dBFS)')
plt.bar(x, rms_values, width, label='RMS (dBFS)')
plt.bar(x + width, lufs_values, width, label='LUFS')

plt.ylabel('分贝')
plt.title('Linear音量增强前后响度对比')
plt.xticks(x, labels)
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

print(f"Linear增强后响度信息:")
print(f"峰值: {linear_loudness['peak_dbfs']:.2f} dBFS (变化: {linear_loudness['peak_dbfs'] - original_loudness['peak_dbfs']:.2f} dB)")
print(f"RMS: {linear_loudness['rms_dbfs']:.2f} dBFS (变化: {linear_loudness['rms_dbfs'] - original_loudness['rms_dbfs']:.2f} dB)")
print(f"LUFS: {linear_loudness['lufs']:.2f} LUFS (变化: {linear_loudness['lufs'] - original_loudness['lufs']:.2f} dB)")
print(f"峰均比: {linear_loudness['crest_factor_db']:.2f} dB")

## dBrms 音量增强

In [None]:
wav_dbrms = volume_aug_dbrms(wav, -6)

# 测量处理后的响度
dbrms_loudness = measure_loudness(wav_dbrms, sr)

# 绘制时域和频域图
plt.figure(figsize=(16, 8))

# 波形图
plt.subplot(2, 1, 1)
visualizer.plot_waveform(wav, "原始波形", ax=plt.gca())
plt.subplot(2, 1, 2)
visualizer.plot_waveform(wav_dbrms, f"dBrms增强波形 (目标电平: -6 dB)", ax=plt.gca())
plt.tight_layout()
plt.show()

# 频谱图
plt.figure(figsize=(16, 8))
plt.subplot(2, 1, 1)
visualizer.plot_spectrogram(wav, "原始频谱图", ax=plt.gca())
plt.subplot(2, 1, 2)
visualizer.plot_spectrogram(wav_dbrms, f"dBrms增强频谱图 (目标电平: -6 dB)", ax=plt.gca())
plt.tight_layout()
plt.show()

# 响度对比
plt.figure(figsize=(10, 6))
labels = ['原始音频', 'dBrms增强']
peak_values = [original_loudness['peak_dbfs'], dbrms_loudness['peak_dbfs']]
rms_values = [original_loudness['rms_dbfs'], dbrms_loudness['rms_dbfs']]
lufs_values = [original_loudness['lufs'], dbrms_loudness['lufs']]

x = np.arange(len(labels))
width = 0.25

plt.bar(x - width, peak_values, width, label='峰值 (dBFS)')
plt.bar(x, rms_values, width, label='RMS (dBFS)')
plt.bar(x + width, lufs_values, width, label='LUFS')

plt.ylabel('分贝')
plt.title('dBrms音量增强前后响度对比')
plt.xticks(x, labels)
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

print(f"dBrms增强后响度信息:")
print(f"峰值: {dbrms_loudness['peak_dbfs']:.2f} dBFS (变化: {dbrms_loudness['peak_dbfs'] - original_loudness['peak_dbfs']:.2f} dB)")
print(f"RMS: {dbrms_loudness['rms_dbfs']:.2f} dBFS (变化: {dbrms_loudness['rms_dbfs'] - original_loudness['rms_dbfs']:.2f} dB)")
print(f"LUFS: {dbrms_loudness['lufs']:.2f} LUFS (变化: {dbrms_loudness['lufs'] - original_loudness['lufs']:.2f} dB)")
print(f"峰均比: {dbrms_loudness['crest_factor_db']:.2f} dB")

# lufs音量增强

In [None]:
wav_lufs = volume_aug_lufs(wav, -16)

# 测量处理后的响度
lufs_loudness = measure_loudness(wav_lufs, sr)

# 绘制时域和频域图
plt.figure(figsize=(16, 8))

# 波形图
plt.subplot(2, 1, 1)
visualizer.plot_waveform(wav, "原始波形", ax=plt.gca())
plt.subplot(2, 1, 2)
visualizer.plot_waveform(wav_lufs, f"LUFS增强波形 (目标响度: -16 LUFS)", ax=plt.gca())
plt.tight_layout()
plt.show()

# 频谱图
plt.figure(figsize=(16, 8))
plt.subplot(2, 1, 1)
visualizer.plot_spectrogram(wav, "原始频谱图", ax=plt.gca())
plt.subplot(2, 1, 2)
visualizer.plot_spectrogram(wav_lufs, f"LUFS增强频谱图 (目标响度: -16 LUFS)", ax=plt.gca())
plt.tight_layout()
plt.show()

# 响度对比
plt.figure(figsize=(10, 6))
labels = ['原始音频', 'LUFS增强']
peak_values = [original_loudness['peak_dbfs'], lufs_loudness['peak_dbfs']]
rms_values = [original_loudness['rms_dbfs'], lufs_loudness['rms_dbfs']]
lufs_values = [original_loudness['lufs'], lufs_loudness['lufs']]

x = np.arange(len(labels))
width = 0.25

plt.bar(x - width, peak_values, width, label='峰值 (dBFS)')
plt.bar(x, rms_values, width, label='RMS (dBFS)')
plt.bar(x + width, lufs_values, width, label='LUFS')

plt.ylabel('分贝')
plt.title('LUFS音量增强前后响度对比')
plt.xticks(x, labels)
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

print(f"LUFS增强后响度信息:")
print(f"峰值: {lufs_loudness['peak_dbfs']:.2f} dBFS (变化: {lufs_loudness['peak_dbfs'] - original_loudness['peak_dbfs']:.2f} dB)")
print(f"RMS: {lufs_loudness['rms_dbfs']:.2f} dBFS (变化: {lufs_loudness['rms_dbfs'] - original_loudness['rms_dbfs']:.2f} dB)")
print(f"LUFS: {lufs_loudness['lufs']:.2f} LUFS (变化: {lufs_loudness['lufs'] - original_loudness['lufs']:.2f} dB)")
print(f"峰均比: {lufs_loudness['crest_factor_db']:.2f} dB")

In [None]:
# 创建三种增强方法的综合对比图
plt.figure(figsize=(16, 12))

# 波形对比
plt.subplot(3, 1, 1)
time = np.linspace(0, len(wav) / sr, len(wav))
plt.plot(time, wav, label='原始波形', alpha=0.7)
plt.plot(time, wav_linear, label='Linear增强', alpha=0.7)
plt.plot(time, wav_dbrms, label='dBrms增强', alpha=0.7)
plt.plot(time, wav_lufs, label='LUFS增强', alpha=0.7)
plt.title('三种音量增强方法波形对比')
plt.xlabel('时间 (s)')
plt.ylabel('幅度')
plt.grid(True, alpha=0.3)
plt.legend()

# 响度对比 - 条形图
plt.subplot(3, 1, 2)
labels = ['原始音频', 'Linear增强', 'dBrms增强', 'LUFS增强']
peak_values = [original_loudness['peak_dbfs'], linear_loudness['peak_dbfs'], 
               dbrms_loudness['peak_dbfs'], lufs_loudness['peak_dbfs']]
rms_values = [original_loudness['rms_dbfs'], linear_loudness['rms_dbfs'], 
              dbrms_loudness['rms_dbfs'], lufs_loudness['rms_dbfs']]
lufs_values = [original_loudness['lufs'], linear_loudness['lufs'], 
               dbrms_loudness['lufs'], lufs_loudness['lufs']]

x = np.arange(len(labels))
width = 0.25

plt.bar(x - width, peak_values, width, label='峰值 (dBFS)')
plt.bar(x, rms_values, width, label='RMS (dBFS)')
plt.bar(x + width, lufs_values, width, label='LUFS')

plt.ylabel('分贝')
plt.title('三种音量增强方法响度对比')
plt.xticks(x, labels)
plt.legend()
plt.grid(True, alpha=0.3)

# 峰均比对比
plt.subplot(3, 1, 3)
crest_values = [original_loudness['crest_factor_db'], linear_loudness['crest_factor_db'],
                dbrms_loudness['crest_factor_db'], lufs_loudness['crest_factor_db']]

plt.bar(x, crest_values, width=0.5, color='purple', alpha=0.7)
plt.ylabel('分贝')
plt.title('三种音量增强方法峰均比对比')
plt.xticks(x, labels)
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# 创建综合对比表格
print("\n三种音量增强方法综合对比:")
print("=" * 80)
print(f"{'方法':<12}{'峰值 (dBFS)':<20}{'RMS (dBFS)':<20}{'LUFS':<20}{'峰均比 (dB)':<15}")
print("-" * 80)
print(f"{'原始音频':<12}{original_loudness['peak_dbfs']:<20.2f}{original_loudness['rms_dbfs']:<20.2f}{original_loudness['lufs']:<20.2f}{original_loudness['crest_factor_db']:<15.2f}")
print(f"{'Linear增强':<12}{linear_loudness['peak_dbfs']:<20.2f}{linear_loudness['rms_dbfs']:<20.2f}{linear_loudness['lufs']:<20.2f}{linear_loudness['crest_factor_db']:<15.2f}")
print(f"{'dBrms增强':<12}{dbrms_loudness['peak_dbfs']:<20.2f}{dbrms_loudness['rms_dbfs']:<20.2f}{dbrms_loudness['lufs']:<20.2f}{dbrms_loudness['crest_factor_db']:<15.2f}")
print(f"{'LUFS增强':<12}{lufs_loudness['peak_dbfs']:<20.2f}{lufs_loudness['rms_dbfs']:<20.2f}{lufs_loudness['lufs']:<20.2f}{lufs_loudness['crest_factor_db']:<15.2f}")
print("=" * 80)

# 各方法响度变化量
print("\n响度变化量 (相对于原始音频):")
print("=" * 80)
print(f"{'方法':<12}{'峰值变化 (dB)':<20}{'RMS变化 (dB)':<20}{'LUFS变化 (dB)':<20}")
print("-" * 80)
print(f"{'Linear增强':<12}{linear_loudness['peak_dbfs'] - original_loudness['peak_dbfs']:<20.2f}{linear_loudness['rms_dbfs'] - original_loudness['rms_dbfs']:<20.2f}{linear_loudness['lufs'] - original_loudness['lufs']:<20.2f}")
print(f"{'dBrms增强':<12}{dbrms_loudness['peak_dbfs'] - original_loudness['peak_dbfs']:<20.2f}{dbrms_loudness['rms_dbfs'] - original_loudness['rms_dbfs']:<20.2f}{dbrms_loudness['lufs'] - original_loudness['lufs']:<20.2f}")
print(f"{'LUFS增强':<12}{lufs_loudness['peak_dbfs'] - original_loudness['peak_dbfs']:<20.2f}{lufs_loudness['rms_dbfs'] - original_loudness['rms_dbfs']:<20.2f}{lufs_loudness['lufs'] - original_loudness['lufs']:<20.2f}")
print("=" * 80)

# 总结分析
print("\n音量增强方法分析总结:")
print("=" * 80)
print("1. Linear增强 (volume_aug_linmax):")
print("   - 基于峰值的线性增益，目标为最大峰值为指定的线性值")
print("   - 特点：保持动态范围不变，整体增强或减弱")
print("   - 适用场景：需要精确控制峰值而不改变音频动态特性的场合")
print("\n2. dBrms增强 (volume_aug_dbrms):")
print("   - 基于RMS电平的增益，目标为指定的dB电平")
print("   - 特点：以能量均值为基准，更接近人耳感知")
print("   - 适用场景：需要统一音频能量电平的场合")
print("\n3. LUFS增强 (volume_aug_lufs):")
print("   - 基于国际响度标准的增益，符合广播标准")
print("   - 特点：考虑人耳频率加权，最接近人耳响度感知")
print("   - 适用场景：广播、流媒体、专业音频处理等需要符合响度标准的场合")
print("=" * 80)

# 音量增强方法综合分析

本笔记本演示并分析了neverlib库中三种音量增强方法的特性和效果:

1. **Linear增强 (volume_aug_linmax)**
   - 基于峰值的线性增益
   - 将音频最大峰值调整为目标线性值(0-1范围)
   - 保持动态范围不变

2. **dBrms增强 (volume_aug_dbrms)**
   - 基于均方根(RMS)功率的增益
   - 将音频RMS电平调整为目标分贝值
   - 更接近能量感知

3. **LUFS增强 (volume_aug_lufs)**
   - 基于感知响度单位(LUFS)的增益
   - 符合广播标准的响度归一化
   - 考虑人耳感知加权，最接近人耳响度感知

各方法适用于不同场景，可根据需求选择合适的音量增强方式。