In [1]:
import sys
sys.path.append('..')  # Replace with the actual path to your blindsnr directory
import blindsnr
import pandas as pd
import librosa
import numpy as np
import soundfile as sf
import IPython
import pandas as pd
import numpy as np
from scipy.io import wavfile

In [2]:
from datasets import load_dataset
ds = load_dataset("google/fleurs", "pt_br", split="train", streaming=True)

# Noisex92 noise dataset path
# I do now own the noise dataset, 
# one must clone it in the github link: https://github.com/speechdnn/Noises
noise_path = "/media/rodrigo/Novo volume/projects/git/Noises/NoiseX-92"

  from .autonotebook import tqdm as notebook_tqdm
INFO:datasets:PyTorch version 2.6.0+cu126 available.


In [3]:
# Load the noise dataset
white_noise = sf.read(f"{noise_path}/white.wav")[0]
babble_noise = sf.read(f"{noise_path}/babble.wav")[0]
car_noise = sf.read(f"{noise_path}/volvo.wav")[0]
factory_noise = sf.read(f"{noise_path}/factory1.wav")[0]

In [4]:
voice_meta_df = []
keys_to_extract = ["transcription", "raw_transcription", "gender", "language"]
n_samples = 10
for i, j in zip(range(n_samples), ds):
    if i >= n_samples:
        break

    subset_dict = pd.DataFrame(dict((key, [j[key]]) for key in keys_to_extract))
    subset_dict["sample_id"] = i
    subset_dict["array"] = [j["audio"]["array"]]
    voice_meta_df.append(subset_dict)

voice_meta_df = pd.concat(voice_meta_df)

In [5]:
IPython.display.Audio(voice_meta_df.iloc[0]["array"], rate=16000)

In [6]:
clean_voice_sample = voice_meta_df.iloc[0]["array"]

In [7]:
# Generate noisy signals and calculate true SNR
# Select the SNR range:
snr_range = np.arange(-10, 15, 5)
snr_df = []


noise_list = [
    white_noise,
    babble_noise,
    car_noise,
    factory_noise
]

noise_names = [
    "white",
    "babble",
    "car",
    "factory"
]

for snr in snr_range:
    for noise, noise_name in zip(noise_list, noise_names):
        snr_df_i = pd.DataFrame({"noise_type": [noise_name]})
        snr_df_i["noisy_array"] = "None"
        for i, row in snr_df_i.iterrows():
            noisy_signal = blindsnr.generate_noisy_signal(snr, clean_voice_sample, noise, verbose=True)
            snr_df_i.at[i, "noisy_array"] = noisy_signal
        snr_df_i["true_snr"] = snr
        snr_df.append(snr_df_i)
snr_df = pd.concat(snr_df).reset_index(drop=True)
    

Desired SNR: -10 dB
True SNR: -10.00 dB
Desired SNR: -10 dB
True SNR: -10.00 dB
Desired SNR: -10 dB
True SNR: -10.00 dB
Desired SNR: -10 dB
True SNR: -10.00 dB
Desired SNR: -5 dB
True SNR: -5.00 dB
Desired SNR: -5 dB
True SNR: -5.00 dB
Desired SNR: -5 dB
True SNR: -5.00 dB
Desired SNR: -5 dB
True SNR: -5.00 dB
Desired SNR: 0 dB
True SNR: -0.00 dB
Desired SNR: 0 dB
True SNR: 0.00 dB
Desired SNR: 0 dB
True SNR: -0.00 dB
Desired SNR: 0 dB
True SNR: 0.00 dB
Desired SNR: 5 dB
True SNR: 5.00 dB
Desired SNR: 5 dB
True SNR: 5.00 dB
Desired SNR: 5 dB
True SNR: 5.00 dB
Desired SNR: 5 dB
True SNR: 5.00 dB
Desired SNR: 10 dB
True SNR: 10.00 dB
Desired SNR: 10 dB
True SNR: 10.00 dB
Desired SNR: 10 dB
True SNR: 10.00 dB
Desired SNR: 10 dB
True SNR: 10.00 dB


In [8]:
# Assuming snr_df is already defined and contains a column named 'array'
for index, row in snr_df.iterrows():
    array = row['noisy_array']
    filename = f"../audio/snr_{row['true_snr']}_{row['noise_type']}.wav"
    print(f"Saving file to {filename}...")
    # Normalize the array so 1 is equivalent to +32767 and -1 is equivalent to -32767
    array = (array * 32767).astype(np.int16)

    snr_df.at[index, "filename"] = filename
    wavfile.write(filename, 16000, array)

Saving file to ../audio/snr_-10_white.wav...
Saving file to ../audio/snr_-10_babble.wav...
Saving file to ../audio/snr_-10_car.wav...
Saving file to ../audio/snr_-10_factory.wav...
Saving file to ../audio/snr_-5_white.wav...
Saving file to ../audio/snr_-5_babble.wav...
Saving file to ../audio/snr_-5_car.wav...
Saving file to ../audio/snr_-5_factory.wav...
Saving file to ../audio/snr_0_white.wav...
Saving file to ../audio/snr_0_babble.wav...
Saving file to ../audio/snr_0_car.wav...
Saving file to ../audio/snr_0_factory.wav...
Saving file to ../audio/snr_5_white.wav...
Saving file to ../audio/snr_5_babble.wav...
Saving file to ../audio/snr_5_car.wav...
Saving file to ../audio/snr_5_factory.wav...
Saving file to ../audio/snr_10_white.wav...
Saving file to ../audio/snr_10_babble.wav...
Saving file to ../audio/snr_10_car.wav...
Saving file to ../audio/snr_10_factory.wav...


In [9]:
snr_df = snr_df[["filename", "true_snr", "noise_type"]]

In [10]:
snr_df["wada_original"] = snr_df.apply(lambda row: blindsnr.wada_original(row['filename'], wada_snr_exe_path="../blindsnr/WadaSNR/Exe/WADASNR",table_file= "../blindsnr/WadaSNR/Table/Alpha0.400000.txt"), axis=1)
snr_df["wada_simplified"] = snr_df.apply(lambda row: blindsnr.wada_simplified(row['filename']), axis=1)
snr_df["nist_stnr_m"] = snr_df.apply(lambda row: blindsnr.nist_stnr_m(row['filename']), axis=1)
snr_df["gaussian_mixture_snr"] = snr_df.apply(lambda row: blindsnr.gaussian_mixture_snr(row['filename']), axis=1)
snr_df["simple_ibm_snr_estimator"] = snr_df.apply(lambda row: blindsnr.simple_ibm_snr_estimator(row['filename']), axis=1)
snr_df["simple_vad_estimate"] = snr_df.apply(lambda row: blindsnr.simple_vad_estimate_snr(row['filename']), axis=1)

  snr_db = 10 * np.log10(snr_linear + 1e-10)  # Add small constant to avoid log(0)


In [11]:
snr_df

Unnamed: 0,filename,true_snr,noise_type,wada_original,wada_simplified,nist_stnr_m,gaussian_mixture_snr,simple_ibm_snr_estimator,simple_vad_estimate
0,../audio/snr_-10_white.wav,-10,white,,-10.17446,1.5,39.927817,-4.252275,0.836618
1,../audio/snr_-10_babble.wav,-10,babble,,-3.967313,4.0,-6.834246,14.297779,3.128258
2,../audio/snr_-10_car.wav,-10,car,,-8.005024,5.75,-1.642455,14.361909,0.828513
3,../audio/snr_-10_factory.wav,-10,factory,,-4.62113,3.25,-20.254835,10.395328,1.681701
4,../audio/snr_-5_white.wav,-5,white,,-4.235576,4.0,-2.550952,-1.810377,1.77497
5,../audio/snr_-5_babble.wav,-5,babble,,-1.980556,4.75,-7.731593,14.814125,3.014983
6,../audio/snr_-5_car.wav,-5,car,,-4.840013,6.0,-0.59723,17.344326,1.320891
7,../audio/snr_-5_factory.wav,-5,factory,,-2.307119,4.25,-19.12092,11.36734,2.04465
8,../audio/snr_0_white.wav,0,white,0.725335,0.71819,7.5,1.086737,1.924112,3.230864
9,../audio/snr_0_babble.wav,0,babble,2.008437,1.996463,7.25,-1.030307,16.601282,3.648393
