In [5]:
import pandas as pd
import random
import scaper

from pathlib import Path
import numpy as np

from scipy import signal
from librosa import load

from sklearn.preprocessing import MinMaxScaler
from PIL import Image

from multiprocessing import cpu_count
from concurrent.futures import ProcessPoolExecutor
from concurrent.futures import as_completed


In [6]:
def gen_scapes_labels(sounds_toUse, source_dir, scape_count, base_name, background_label, scape_dur, foreground_dir, max_freq):
    for i in range(len(sounds_toUse)):
        # read csv of each sound_toUse:
        sounds_list[i] = pd.read_csv(f"{source_dir}/audio/foreground_csvs/{sounds_toUse[i][0]}.csv", header=None)
        sounds_list[i].columns = ["Src_file", "Duration", "Low_freq", "High_freq"]
        # print(sounds_list[i], "\n")

    # print(f"sounds_list[0] = {sounds_list[0]}\n")

    for scape in range(scape_count):

        # set up scape info
        scape_name = f"{base_name}_scape{scape}"
        print(f"scape{scape}:")
        label = ""

        # set up scaper
        sc = scaper.Scaper(scape_dur, f"{source_dir}/audio/foreground", f"{source_dir}/audio/background")
        sc.ref_db = -52 #TODO

        audiofile = f"{out_dir}/scapes/{scape_name}.wav"
        jamsfile = f"{out_dir}/jams/{scape_name}.jams"

        sc.add_background(label = ("const", background_label),
                        source_file = ("choose", []),
                        source_time = ("uniform", 0, 60-scape_dur)) # background source files are 60 seconds long

        for i in range(len(sounds_toUse)): # for each type of call
            df = sounds_list[i]
            j = random.randint(sounds_toUse[i][1], sounds_toUse[i][2]) # choose how many calls of this type in the file

            for k in range(j):
                foreground_label = sounds_toUse[i][0]

                choice = df.sample() # choose a random sound from the list of this call type
                src = choice["Src_file"].iloc[0]

                t = round(random.uniform(0,scape_dur-.25), scape_dur) #start time in file
                dur = round(choice["Duration"].iloc[0], 3)
                end = t + dur
                if(end > scape_dur):
                    end = scape_dur
                    dur = end - t
                lo_freq = choice["Low_freq"].iloc[0]
                hi_freq = choice["High_freq"].iloc[0]

                if (hi_freq > max_freq): # just in case
                    hi_freq = max_freq

                YOLO_class = i
                xCenter_percent = round((end + t)/(2*scape_dur),6)
                yCenter_percent = round((hi_freq + lo_freq)/(2*max_freq),6)

                width_percent = round((end-t)/scape_dur, 6)
                height_percent = round((hi_freq - lo_freq)/max_freq, 6)

                if (label == ""):
                    label = f"{YOLO_class} {xCenter_percent} {yCenter_percent} {width_percent} {height_percent}"    

                else:
                    label = f"{label}\n{YOLO_class} {xCenter_percent} {yCenter_percent} {width_percent} {height_percent}"

                sc.add_event(label=("const", sounds_toUse[i][0]),
                        source_file = ("const", f"{foreground_dir}/{foreground_label}/{src}"),
                        source_time = ("const", 0),
                        event_time = ("const",t),
                        event_duration = ("const", dur), # might get warnings
                        snr = ("uniform", 5, 10), #-10, 6), #TODO: this always needs tested in case something is different about the foreground files
                        pitch_shift = None,
                        time_stretch = None )

        # save labels to .txt file
        with open(f"{out_dir}/labels/{scape_name}.txt", "w") as text_file:
            text_file.write(label)
#         print(label)

        # add junk sounds, if any
        if (junk != None):
            junk_count = random.randint(junk[1], junk[2])    

            for j in range(junk_count):
                sc.add_event(label=("const", junk[0]),
                        source_file = ("choose", []),
                        source_time = ("const", 0),
                        event_time = ("const",t),
                        event_duration = ("const", dur), # might get warnings
                        snr = ("uniform", 10, 20), #-10, 6), #TODO: this always needs tested in case something is different about the foreground files
                        pitch_shift = None,
                        time_stretch = None )

        sc.generate(audiofile,jamsfile,
                       allow_repeated_label=True,
                       allow_repeated_source=True,
                       reverb=0,
                       disable_sox_warnings=True,
                       no_audio=False)
            


In [7]:
def decibel_filter(spectrogram, db_cutoff=-100.0):
    remove_zeros = np.copy(spectrogram)
    remove_zeros[remove_zeros == 0.0] = np.nan
    inDb = 10.0 * np.log10(remove_zeros)
    inDb[inDb <= db_cutoff] = db_cutoff
    return np.nan_to_num(10.0 ** (inDb / 10.0))


def chunk_preprocess(chunk):
    results = [None] * chunk.shape[0]
    for idx, (_, row) in enumerate(chunk.iterrows()):
        results[idx] = (row["Index"], preprocess(row["Filename"],width_px, height_px))
    return results


def preprocess(filename, width_px, height_px):

    print(f'\nfilename = {filename}\n')

    # The path for p.stem
    p = Path(filename) 

    # Generate frequencies and times
    samples, sample_rate = load(
        f"{out_dir}/{p.parent}/{p.stem}.wav", mono=False, sr=44100, res_type="kaiser_fast" #22050
    )
    freq, time, spec = signal.spectrogram(
        samples,
        sample_rate,
        window="hann",
        nperseg=512,
        noverlap=384,
        nfft=512,
        scaling="spectrum",
    )

    # Filters
    spec = decibel_filter(spec)
    spec = np.log10(spec)
    spec_mean = np.mean(spec)
    spec_std = np.std(spec)
    spec = (spec - spec_mean) / spec_std

    # Lowpass filter
    lowpass = max_freq #hz
    highest_index = np.abs(freq - lowpass).argmin()
    spec = spec[0:highest_index, :]
    freq = freq[0:highest_index]

    # Save spectrogram of the wav file
    scaler = MinMaxScaler(feature_range=(0, 255))
    spec = scaler.fit_transform(spec)
    image = Image.fromarray(np.flip(spec, axis=0))
    image = image.convert("RGB")
    image = image.resize((width_px, height_px)) #10304, 256  notched specto = 5164 x 101, squished to be divisible by 32
    image.save(f"{out_dir}/JPEGImages/{p.stem}.jpg")   

    return f"{out_dir}/JPEGImages/{p.stem}.jpg"

In [8]:
base_name = "rats_EATO_WOTH" # the name prefixing every scape. Should match output folder name - UPDATE 
scape_count = 5 # UPDATE
scape_dur = 5 # UPDATE

# UPDATE: update source_dir, confirm directory structure inside source_dir matches expected (audio/foreground, audio/background, audio/foreground_csvs)
#       note: there should be a matching .csv file in audio/foreground_csvs for each foreground folder, containing clip length & freq information to build boxes from
#       TODO: store/read background file length information instead of assuming 1-minute files

source_dir = "/Users/kitzeslab/Desktop/yolo-scripts/scape-gen"
out_dir = f"{source_dir}/{base_name}"
background_dir = f"{source_dir}/audio/background"
background_label = "norats-nofarinosas"
foreground_dir = f"{source_dir}/audio/foreground"
junk = None #("junk-easy", 0, 2) # junk = None if none

max_freq = 8000 # for lowpass filter in image gen - UPDATE
width_px = 5152 # define image resize - UPDATE 
height_px = 96

# prep empty variables to fill
labels = [None]*scape_count
count = 0 # current label number, as reference index for place in labels

# UPDATE: fill sounds_toUse with desired scape properties
# sounds_toUse = list of tuples containing which sound categories to include in this dataset, and how many to include - specified as min, max per scape
#    TODO: add probability option?
sounds_toUse = [("rats-fewer-singlebarks", 1, 2), ("EATO", 0, 1), ("WOTH", 0, 2)]
sounds_list = [None]*len(sounds_toUse)

In [9]:
gen_scapes_labels(sounds_toUse, source_dir, scape_count, base_name, background_label, scape_dur, foreground_dir, max_freq)

scape0:




scape1:




scape2:




scape3:




scape4:




In [10]:
# Fill DF "df" with all scape names and indices
df = pd.read_csv(f"{out_dir}/wavfiles.txt", header=None)
df.columns= ["Filename"]
df["Index"] = df.index.values

# Create empty DF "results" with indices matching "df"
results = pd.DataFrame(index=df.index.values, columns=["X", "y"], dtype=str)

# Parallelizee - send one filename at a time (chunk) to preproceser, which sends it to be processed)
nprocs = cpu_count()
chunks = np.array_split(df[["Filename", "Index"]], nprocs)
executor = ProcessPoolExecutor(nprocs)
futs = [executor.submit(chunk_preprocess, chunk) for chunk in chunks]

for fut in as_completed(futs):
    res = fut.result()
    for idx, (X, y) in res:
        results.loc[idx, "X"] = X
        results.loc[idx, "y"] = y

# results.to_csv(f"{out_dir}/all_files.csv", index=None) # TODO NEXT: output all_files.csv as before to make gen_trainTest easy. Example below.
#X,y
# /path/to/{base}/JPEGImages/{base}_scape0.jpg,"['{class1}', '{class3}', '{class4}', ..]"
# /path/to/{base}/JPEGImages/{base}_scape1.jpg,"['{class0}', ..]"


filename = scapes/rats_EATO_WOTH_scape2.wav

filename = scapes/rats_EATO_WOTH_scape3.wav

filename = scapes/rats_EATO_WOTH_scape0.wav


filename = scapes/rats_EATO_WOTH_scape4.wav




filename = scapes/rats_EATO_WOTH_scape1.wav



ValueError: too many values to unpack (expected 2)