this script is for all mixing, including mixing with background noise and mixup to augment sound

In [1]:
import os
import glob
import soundfile as sf
import pandas as pd

In [2]:
# read seltab_list excel file as reference for SNR value range
seltab_list = pd.read_excel('F:/MSc Ecology & Data Science Research/Metadata/public_domainSNR.xlsx')
seltab_list.describe()

Unnamed: 0,selec,channel,start,end,bottom.freq,top.freq,orig_start,orig_end,group,Comments,time_gap,group_id,SNR
count,4736.0,4736.0,4736.0,4736.0,4736.0,4736.0,2368.0,2368.0,3570.0,0.0,4736.0,4736.0,4736.0
mean,36.970439,1.080236,65.164154,66.951427,0.575318,5.221522,65.721761,66.298033,21.880112,,2.465092,34.044341,-0.046808
std,38.124854,0.271688,65.783907,65.804193,0.319506,3.03125,65.774928,65.812038,22.289548,,5.149162,35.514694,17.250671
min,1.0,1.0,0.0,0.316433,0.0,0.6742,0.0,0.316433,0.0,,-0.566045,1.0,-70.566867
25%,9.0,1.0,15.123606,16.879506,0.3939,2.7833,15.656208,16.16844,5.0,,0.150323,8.0,-8.902975
50%,23.5,1.0,44.956894,46.810996,0.5307,5.0154,45.426174,46.045896,14.0,,0.640805,21.0,2.439086
75%,54.0,1.0,93.205402,94.643943,0.6743,6.512,93.730766,94.267953,32.0,,2.357563,50.0,12.255604
max,189.0,2.0,334.84128,337.565847,3.2327,22.05,334.84128,336.633308,111.0,,74.532614,177.0,39.055463


In [None]:
cutoff = seltab_list['SNR'].mean()
minSNR = seltab_list['SNR'].min()
maxSNR = seltab_list['SNR'].max()

In [4]:
%%capture 
from audiomentations import AddBackgroundNoise, PolarityInversion

# the code is for dartmoor weak labelled data augmentation
# set up folders
input_for_high  = "F:/MSc Ecology & Data Science Research/3. augment data_train/Wilcoxon/Dartmoor_high snr_weak_call"
input_for_low   = "F:/MSc Ecology & Data Science Research/3. augment data_train/Wilcoxon/Dartmoor_low snr_weak_call"
noise_for_high  = "F:/MSc Ecology & Data Science Research/3. augment data_train/Wilcoxon/Dartmoor_high snr_weak_noise"
noise_for_low   = "F:/MSc Ecology & Data Science Research/3. augment data_train/Wilcoxon/Dartmoor_low snr_weak_noise"

output_folder_high = "F:/MSc Ecology & Data Science Research/3. train_data/1. Wilcoxon/dr_highsnr_weak/Red Fox"
output_folder_low  = "F:/MSc Ecology & Data Science Research/3. train_data/1. Wilcoxon/dr_lowsnr_weak/Red Fox"

# create the augmentation transform
# specify the range of SNR, the rest are defaults
# this function will randomly pick SNR within the specified range
transform_low = AddBackgroundNoise(
    sounds_path=noise_for_low,
    min_snr_db=minSNR,
    max_snr_db=cutoff,
    noise_transform=PolarityInversion(),
    p=1.0
)

transform_high = AddBackgroundNoise(
    sounds_path=noise_for_high,
    min_snr_db=cutoff,
    max_snr_db=maxSNR,
    noise_transform=PolarityInversion(),
    p=1.0
)



# create a list of wav files in input folder
sound_paths_low = glob.glob(os.path.join(input_for_low, "*.wav"))
sound_paths_high = glob.glob(os.path.join(input_for_high, "*.wav"))


# loop, augment, and save
for sound_path in sound_paths_low:
    # read sound files
    samples, sample_rate = sf.read(sound_path)
    # apply augmentation
    augmented_samples = transform_low(samples, sample_rate)
    # save back out (same filename into output_folder)
    fname = os.path.basename(sound_path)
    out_path = os.path.join(output_folder_low, fname)
    sf.write(out_path, augmented_samples, sample_rate)

# loop, augment, and save
for sound_path in sound_paths_high:
    # read sound files
    samples, sample_rate = sf.read(sound_path)
    # apply augmentation
    augmented_samples = transform_high(samples, sample_rate)
    # save back out (same filename into output_folder)
    fname = os.path.basename(sound_path)
    out_path = os.path.join(output_folder_high, fname)
    sf.write(out_path, augmented_samples, sample_rate)

In [5]:
%%capture 
from audiomentations import AddBackgroundNoise, PolarityInversion

# the code is for dartmoor strong labelled data augmentation
# set up folders
input_for_high  = "F:/MSc Ecology & Data Science Research/3. augment data_train/Wilcoxon/Dartmoor_high snr_strong_call"
input_for_low   = "F:/MSc Ecology & Data Science Research/3. augment data_train/Wilcoxon/Dartmoor_low snr_strong_call"
noise_for_high  = "F:/MSc Ecology & Data Science Research/3. augment data_train/Wilcoxon/Dartmoor_high snr_strong_noise"
noise_for_low   = "F:/MSc Ecology & Data Science Research/3. augment data_train/Wilcoxon/Dartmoor_low snr_strong_noise"

output_folder_high = "F:/MSc Ecology & Data Science Research/3. train_data/1. Wilcoxon/dr_highsnr_strong/Red Fox"
output_folder_low  = "F:/MSc Ecology & Data Science Research/3. train_data/1. Wilcoxon/dr_lowsnr_strong/Red Fox"

# create the augmentation transform
# specify the range of SNR, the rest are defaults
# this function will randomly pick SNR within the specified range
transform_low = AddBackgroundNoise(
    sounds_path=noise_for_low,
    min_snr_db=minSNR,
    max_snr_db=cutoff,
    noise_transform=PolarityInversion(),
    p=1.0
)

transform_high = AddBackgroundNoise(
    sounds_path=noise_for_high,
    min_snr_db=cutoff,
    max_snr_db=maxSNR,
    noise_transform=PolarityInversion(),
    p=1.0
)



# create a list of wav files in input folder
sound_paths_low = glob.glob(os.path.join(input_for_low, "*.wav"))
sound_paths_high = glob.glob(os.path.join(input_for_high, "*.wav"))


# loop, augment, and save
for sound_path in sound_paths_low:
    # read sound files
    samples, sample_rate = sf.read(sound_path)
    # apply augmentation
    augmented_samples = transform_low(samples, sample_rate)
    # save back out (same filename into output_folder)
    fname = os.path.basename(sound_path)
    out_path = os.path.join(output_folder_low, fname)
    sf.write(out_path, augmented_samples, sample_rate)

# loop, augment, and save
for sound_path in sound_paths_high:
    # read sound files
    samples, sample_rate = sf.read(sound_path)
    # apply augmentation
    augmented_samples = transform_high(samples, sample_rate)
    # save back out (same filename into output_folder)
    fname = os.path.basename(sound_path)
    out_path = os.path.join(output_folder_high, fname)
    sf.write(out_path, augmented_samples, sample_rate)

A path to a file/folder with sound(s), or a list of file/folder paths, must be specified. These sounds should ideally be at least as long as the input sounds to be transformed. Otherwise, the background sound will be repeated, which may sound unnatural.
What to do before mixing:
- import xeno canto metadata
- import dartmoor recording metadata
- get the length of xeno canto file 
- crop the background noise of Dartmoor recording so the length would be the same as xeno canto file
- get random dartmoor background noise
- match the background noise with xeno canto file randomly

It works, with caveat however
- if you want to get the SNR that you want, you must use the most clear signal to overlay with the noise
- if the sound already has noise, the SNR would not add linearly
- it will add noise on top of noise and you can only estimates
- so I guess for this case, only use the cleanest sound, overlay with the noise, and modify the SNR through the module