In [2]:
import numpy as np
import pyroomacoustics as pra
from IPython.display import Audio
from src.file_io import load_signal_from_wav

fs = 16000
source = load_signal_from_wav("data/arctic_a0001.wav", fs).astype(float)
noise = load_signal_from_wav("data/arctic_a0002.wav", fs).astype(float)

SNR修正プログラム

In [3]:
def calculate_power(signal):
    # signalの型がfloatであることを確認
    # int16などだとオーバーフローする可能性がある
    assert signal.dtype == float
    return np.sum(signal ** 2) / len(signal)

def calculate_snr(signal, noise):
    p_signal = calculate_power(signal)
    p_noise = calculate_power(noise)
    snr = 10 * np.log10(p_signal / p_noise)
    return snr

def calculate_coef(source, noise, snr_target):
    p_source = calculate_power(source)
    p_noise = calculate_power(noise)
    # logの真数
    argument = 10 ** (snr_target / 10)
    # 係数の計算
    coefficient = np.sqrt(argument / (p_source / p_noise))
    return coefficient

print(f"source: {calculate_power(source):.2f}, noise: {calculate_power(noise):.2f}, SNR: {calculate_snr(source, noise):.2f} dB")

coefficient = calculate_coef(source, noise, -10)
print(f"coefficient: {coefficient:.2f}")

adjusted_source = source * coefficient
adjusted_snr = calculate_snr(adjusted_source, noise)
print(f"adjusted SNR: {adjusted_snr:.2f} dB")

adjusted_noise = noise / coefficient
adjusted_snr = calculate_snr(source, adjusted_noise)
print(f"adjusted SNR: {adjusted_snr:.2f} dB")

source: 8397178.89, noise: 7399857.06, SNR: 0.55 dB
coefficient: 0.30
adjusted SNR: -10.00 dB
adjusted SNR: -10.00 dB


PRA環境でのテスト (単一音源)

In [4]:
room_dim = [10, 10]
mic_loc = np.array([[5, 5]]).T
signal_length = 16000

# source
room_source = pra.ShoeBox(room_dim, fs=fs, max_order=1)
room_source.add_source(np.array([7, 5]), signal=source)
room_source.add_microphone_array(pra.MicrophoneArray(mic_loc, room_source.fs))
room_source.simulate()
rec_source = room_source.mic_array.signals

# noise
room_noise = pra.ShoeBox(room_dim, fs=fs, max_order=1)
room_noise.add_source(np.array([1, 5]), signal=noise)
room_noise.add_microphone_array(pra.MicrophoneArray(mic_loc, room_noise.fs))
room_noise.simulate()
rec_noise = room_noise.mic_array.signals

# SNR
snr = calculate_snr(rec_source[0], rec_noise[0])
print(f"SNR: {snr:.2f} dB")

n_sources:  5
image_source_model:  0.00098419189453125
RIR computation done in 0.01 seconds.
n_sources:  5
image_source_model:  0.0
RIR computation done in 0.00 seconds.
SNR: 3.76 dB


In [5]:
coef = calculate_coef(rec_source[0], rec_noise[0], 10)
print(f"coef: {coef:.2f}")
source_adjusted = source * coef

# source
room_source = pra.ShoeBox(room_dim, fs=16000, max_order=1)

room_source.add_source(np.array([7, 5]), signal=source_adjusted)
room_source.add_microphone_array(pra.MicrophoneArray(mic_loc, room_source.fs))

room_source.simulate()
rec_source2 = room_source.mic_array.signals

snr = calculate_snr(rec_source2[0], rec_noise[0])
print(f"SNR: {snr:.2f} dB")

coef: 2.05
n_sources:  5
image_source_model:  0.0
RIR computation done in 0.00 seconds.
SNR: 10.00 dB


音源を複数置いたときのパワー

In [6]:
room_dim = [10, 10]
mic_loc = np.array([[5, 5]]).T
signal_length = 16000

room1 = pra.ShoeBox(room_dim, fs=fs, max_order=1)
room1.add_source(np.array([7, 5]), signal=source)
room1.add_microphone_array(pra.MicrophoneArray(mic_loc, room1.fs))
room1.simulate()
rec1 = room1.mic_array.signals
print(calculate_power(rec1[0]))

room2 = pra.ShoeBox(room_dim, fs=fs, max_order=1)
room2.add_source(np.array([7, 5]), signal=source)
room2.add_source(np.array([5, 7]), signal=source)
room2.add_microphone_array(pra.MicrophoneArray(mic_loc, room2.fs))
room2.simulate()
rec2 = room2.mic_array.signals
print(calculate_power(rec2[0]))

n_sources:  5
image_source_model:  0.0
RIR computation done in 0.00 seconds.
2459620.1474097497
n_sources:  5
n_sources:  5
image_source_model:  0.0
RIR computation done in 0.00 seconds.
9838480.589638999


In [7]:
calculate_power(rec1[0]) * 4

9838480.589638999

In [4]:
# notebook上で再生
Audio(rec_source[0], rate=16000)

In [5]:
Audio(rec_noise[0], rate=16000)