1. get clean song segments (~50)
2. get seagull noise (~10)
3. get noisy segments (~20)
4. find event noises (~10)

mix and match:
generate audios such that mix all the sources above (in varying amplitudes, length)

train VAE >> get clean result

In [1]:
import os
import numpy as np
from scipy.io import wavfile
import matplotlib.pyplot as plt

from IPython.display import Audio

from helpers import *

In [10]:
def get_noise(audio, sr=44100, freq_cutoff=1000):
    return filter(audio, sr, freq_cutoff, t="lowpass")

def pad_zero(signal, length, str_idx):
    new_sig = np.zeros(length)
    new_sig[str_idx:str_idx+len(signal)] = signal
    return new_sig

In [2]:
random_seed = 0

In [3]:
data_dir = "clear_bird_calls_raw"
filenames = os.listdir(data_dir)

In [4]:
# for each song in the folder, apply high pass filter and save audio
freq_thresh = 2500
save_dir = "filtered_bird_calls"

for fn in filenames:
    if (fn.endswith(".wav")):
        fs, song = wavfile.read(os.path.join(data_dir, fn))
        # apply high pass
        high_song = filter(song, fs, freq_thresh, t="highpass")
        high_song = scale_song(high_song) # scale to int for saving
        # path for saving song
        prefix = fn.split(".wav")[0]
        save_path = os.path.join(save_dir, prefix+f"_{freq_thresh}.wav")
        wavfile.write(save_path, rate=fs, data=high_song)



In [6]:
n_noise_sample = 100
data_dir = "segments/2015"

rng = np.random.default_rng(seed=random_seed)
wavs = list_wav(data_dir)
filenames = rng.choice(wavs, n_noise_sample, replace=False)


In [11]:
for i, filename in enumerate(filenames):
    
    sr, audio = wavfile.read(os.path.join(data_dir, filename))
    noise = get_noise(audio, sr=sr, freq_cutoff=2000)
    wavfile.write(f"noise/bg_2000/bg_{i}.wav", rate=sr, data=scale_song(noise))

In [12]:
# get seagull noise
# times = [[0.353, 0.573], [0.597, 0.762], [2.180, 2.506], [2.671, 3.012],
#          [3.089, 3.652]]
times = [[0, 2.604]]
sr, gull = wavfile.read("noise/seagull.WAV")

  sr, gull = wavfile.read("noise/seagull.WAV")


In [13]:
for i, t in enumerate(times):
    gull_i = gull[int(sr*t[0]):int(sr*t[1])]
    wavfile.write(f"noise/seagull/seagull_{i}.wav", rate=sr, data=gull_i)

In [30]:
n_samples = 500
seed = 42

bg_dir = "noise/bg_2000"
noise_dir = "noise/seagull"
signal_dir = "filtered_bird_calls"

bg_list = list_wav(bg_dir)
noise_list = list_wav(noise_dir)
signal_list = list_wav(signal_dir)
rng = np.random.default_rng(seed=random_seed)


In [31]:
def synth(signal, bg, ratio=0.1):
    return ratio*signal + (1-ratio) * bg

In [40]:
metadata = []
for i in range(n_samples):
    bg_name = rng.choice(bg_list)
    sr, bg = wavfile.read(bg_dir+"/"+bg_name)
    sig_name = rng.choice(signal_list)
    _, signal = wavfile.read(signal_dir+"/"+sig_name)

    # add_noise = rng.integers(0, 2, size=1)[0]
    # if (add_noise):
    #     _, noise = wavfile.read(noise_dir+"/"+rng.choice(noise_list))
    #     # embed noise
    #     noise_start = rng.integers(0, len(bg)-len(noise)-1, size=1)[0]
    #     noise = pad_zero(noise, len(bg), noise_start)
    #     # noise = scale_song(pad_zero(noise, len(bg), noise_start))
    #     print(bg.dtype)
    #     bg = bg + noise

    # embed signal
    sig_start = rng.integers(0, len(bg)-len(signal)-1, size=1)[0]
    signal = pad_zero(signal, len(bg), sig_start)
    
    r = rng.uniform(0.05, 0.25)
    
    new_data = synth(signal, bg, ratio=r)
    wavfile.write(f"synth/composite_{i}.wav", rate=sr, 
                  data=scale_song(new_data))
    wavfile.write(f"clean/clean_{i}.wav", rate=sr, data=scale_song(signal))
    
    metadata.append([f"composite_{i}.wav", bg_name, sig_name, r])

In [38]:
import pandas as pd
df = pd.DataFrame(metadata, columns=["filename", "bg", "signal", "r"])
df.to_csv("synth_metadata.csv")