In [25]:
import audio as aud
import parsing
import librosa as lr
from IPython.display import display, Audio
import os
import numpy as np
from pathlib import Path
from importlib import reload
reload(aud)


MIX_LENGTH = 10
SAMPLE_RATE = 16000


audio_dir = "/home/eilen/Code/Python/LASS_PAM/data/raw/babyslakh_16k/Track00001/stems"
audios = [aud.load_audio_segment(os.path.join(audio_dir, f)) for f in os.listdir(audio_dir) if f.endswith(".wav")]
res = aud.scatter_audio_segments(audios, mix_duration_s=30, mix_division=5, instance_probability=.5, max_seg_duration_s=5.0)

display(Audio(res, rate=16000))

In [26]:
res = parsing.track_metadata(Path("/home/eilen/Code/Python/LASS_PAM/data/raw/babyslakh_16k/Track00002"))
for s, v in res['stems'].items():
    print(s, v['midi_program_name'])

res = parsing.track_stems_and_instr(res)
print(res)
filt = ["string", "violin", "piano", "choir"]
reject = ["electric"]
print(parsing.filter_stems_by_keywords(res, filt, reject))

S00 Synth Strings 1
S01 Electric Bass (pick)
S02 Lead 2 (sawtooth)
S03 Pad 1 (new age)
S04 Choir Aahs
S05 Music Box
S06 Clavinet
S07 Brass Section
S08 Electric Guitar (muted)
S09 Drums
S10 Woodblock
S11 Orchestra Hit
{'S00': 'Synth Strings 1', 'S01': 'Electric Bass (pick)', 'S02': 'Lead 2 (sawtooth)', 'S03': 'Pad 1 (new age)', 'S04': 'Choir Aahs', 'S05': 'Music Box', 'S06': 'Clavinet', 'S07': 'Brass Section', 'S08': 'Electric Guitar (muted)', 'S09': 'Drums', 'S10': 'Woodblock', 'S11': 'Orchestra Hit'}
{'string': {'S00': 'Synth Strings 1'}, 'violin': {}, 'piano': {}, 'choir': {'S04': 'Choir Aahs'}}


In [27]:
filt = ["string", "violin", "piano", "choir"]
rej = ["electric"]


instr_sets = dict()

dataset_dir = "/home/eilen/Code/Python/LASS_PAM/data/raw/babyslakh_16k/"

def instrument_sets(dataset_dir : Path) -> dict[str, list[Path]]:
    instr_sets = dict()
    for f in os.listdir(dataset_dir):
        if not os.path.isdir(os.path.join(dataset_dir, f)):
            print(f"{f} is not a directory, skipping")
            continue
        meta = parsing.track_metadata(os.path.join(dataset_dir,f))
        instrs = parsing.track_stems_and_instr(meta)
        for s, i in instrs.items():
            if i not in instr_sets :
                instr_sets[i] = list()
            instr_sets[i].append(Path(os.path.join(dataset_dir, f + '/stems/' + s + '.wav')))
    return instr_sets

def filter_dataset_by_keywords(dataset_dir, filt, rej):
    for f in os.listdir(dataset_dir):
        if not os.path.isdir(os.path.join(dataset_dir, f)):
            print(f"{f} is not a directory, skipping")
            continue
        meta = parsing.track_metadata(Path(dataset_dir + f))
        for inst, stem_midi in parsing.filter_stems_by_keywords(parsing.track_stems_and_instr(meta), filt, rej).items():
            if inst not in instr_sets:
                instr_sets[inst] = dict()
            for s, i in stem_midi.items():
                instr_sets[inst][f + '/' + 'stems' + '/' + s + '.wav'] = i
    return instr_sets

def filter_instr_by_keywords(instr_sets:dict[str, list[Path]], filt, rej) -> dict[str, list[Path]]:
    res = dict()
    for f in filt:
        if f not in res:
            res[f] = list()
        print(instr_sets)
        for inst, stems in instr_sets.items():
            if f.upper() in inst.upper() and not any(r.upper() in inst.upper() for r in rej):
                res[f] += stems
    return res


print(filter_instr_by_keywords(instrument_sets(dataset_dir), filt, rej))


{'Synth Strings 1': [PosixPath('/home/eilen/Code/Python/LASS_PAM/data/raw/babyslakh_16k/Track00019/stems/S00.wav'), PosixPath('/home/eilen/Code/Python/LASS_PAM/data/raw/babyslakh_16k/Track00017/stems/S02.wav'), PosixPath('/home/eilen/Code/Python/LASS_PAM/data/raw/babyslakh_16k/Track00017/stems/S05.wav'), PosixPath('/home/eilen/Code/Python/LASS_PAM/data/raw/babyslakh_16k/Track00018/stems/S03.wav'), PosixPath('/home/eilen/Code/Python/LASS_PAM/data/raw/babyslakh_16k/Track00002/stems/S00.wav'), PosixPath('/home/eilen/Code/Python/LASS_PAM/data/raw/babyslakh_16k/Track00020/stems/S09.wav'), PosixPath('/home/eilen/Code/Python/LASS_PAM/data/raw/babyslakh_16k/Track00014/stems/S03.wav'), PosixPath('/home/eilen/Code/Python/LASS_PAM/data/raw/babyslakh_16k/Track00005/stems/S11.wav'), PosixPath('/home/eilen/Code/Python/LASS_PAM/data/raw/babyslakh_16k/Track00004/stems/S05.wav')], 'Electric Piano 1': [PosixPath('/home/eilen/Code/Python/LASS_PAM/data/raw/babyslakh_16k/Track00019/stems/S01.wav'), PosixPa

In [28]:
import random
import json


def chose_rand_stems(instrs_and_stems : dict[str, list[Path]], instruments : list[str], n=1) -> list[tuple[str, str, Path]]:
    """ returns a list of n tuples of the form (instrument type (keyword), instrument, stem_path)
        for each keyword in `instruments`,
        where instrument is a random instrument matching the keyword
        stem_path is a random stem path for that instrument. """
    matches = {i: [k for k in instrs_and_stems if i.upper() in k.upper()] for i in instruments}
    return [(i, k:=random.choice(matches[i]), random.choice(instrs_and_stems[k])) for i in instruments for _ in range(n)]



def build_datapoint(instrs_and_stems : dict[str, list[Path]], filtered_instr_and_stems : dict[str, list[Path]], prompt_target_recording:tuple[str, str, Path], mix_duration_s=30, mix_division=5, instance_probability=.5, max_seg_duration_s=5.0):
    res = dict()
    metadata = dict()
    background_instruments = set()
    while len(background_instruments) < 4:
        attempt = None
        while attempt is None:
            b = random.choice(list(instrs_and_stems.keys()))
            if prompt_target_recording[0].upper() not in b.upper():
                attempt = aud.load_audio_segment(random.choice(instrs_and_stems[b]))
                if attempt is None:
                    print(f"Failed to load background instrument {b}, retrying...")
        background_instruments.add(b)
    background_keyword_instrument_stems = chose_rand_stems(instrs_and_stems, list(background_instruments), n=1)

    _prompt_instr = [i for i in filtered_instr_and_stems.keys() if prompt_target_recording[0].upper() in i.upper()]
    reference = None
    for i in range(10):
        reference = aud.normalize_energy(aud.get_active_audio(aud.load_audio_segment(x := random.choice(filtered_instr_and_stems[random.choice(_prompt_instr)]))))
        print(x)
        if reference is not None:
            break
    if reference is None:
        raise Exception(f"Could not load reference audio for prompt {prompt_target_recording[0]} after 10 attempts")
    reference = reference[:min(MIX_LENGTH * SAMPLE_RATE, len(reference))]
    
    target_raw = None
    for i in range(10):
        target_raw = aud.normalize_energy(aud.load_audio_segment(prompt_target_recording[2]))
        if target_raw is not None:
            break
    if target_raw is None:
        raise Exception(f"Could not load target audio for prompt {prompt_target_recording[0]} after 10 attempts")

    mixture = aud.scatter_audio_segments([aud.load_audio_segment(b[2]) for b in background_keyword_instrument_stems], mix_duration_s=mix_duration_s, mix_division=mix_division, instance_probability=instance_probability, max_seg_duration_s=max_seg_duration_s)

    res['reference'] = reference
    res['target'] = aud.scatter_audio_segments([target_raw], mix_duration_s=mix_duration_s, mix_division=mix_division, instance_probability=instance_probability, max_seg_duration_s=max_seg_duration_s)
    mixture += res['target']
    mixture = aud.normalize_energy(mixture, alpha=.9)
    res['mixture'] = mixture

    metadata['prompt'] = prompt_target_recording[0]
    metadata['target_instrument'] = prompt_target_recording[1]
    metadata['background_instruments'] = [b[1] for b in background_keyword_instrument_stems]

    meta_json = json.dumps(metadata)
    return res, meta_json, background_instruments

def save_datapoint(datapoint : dict[str, np.ndarray], metadata_json : str, save_dir : str):
    os.makedirs(save_dir, exist_ok=True)
    aud.save_audio(os.path.join(save_dir, 'reference.wav'), datapoint['reference'])
    aud.save_audio(os.path.join(save_dir, 'target.wav'), datapoint['target'])
    aud.save_audio(os.path.join(save_dir, 'mixture.wav'), datapoint['mixture'])
    with open(os.path.join(save_dir, 'metadata.json'), 'w') as f:
        f.write(metadata_json)
    

In [29]:
dataset_path = Path("/media/eilen/EXTERNAL_USB/Research/Data/Datasets/slakh2100_yourmt3_16k/test")
savepath = Path("/media/eilen/EXTERNAL_USB/Research/Data/Datasets/slakh2100_yourmt3_16k/test_new")
print("loading...")
instrs_and_stems = instrument_sets(dataset_path)
print("instrs_and_stems", instrs_and_stems)
filt = ["violin", "piano", "sax"]
rej = ["electric", "synth"]

filtered = filter_instr_by_keywords(instrs_and_stems, filt, rej)
print("filtered", filtered)

for i in range(100):
    attempt = None
    while attempt is None:
         prompt_target = random.choice(chose_rand_stems(filtered, filt, n=1))
         attempt = aud.load_audio_segment(prompt_target[2])
         if attempt is None:
             print(f"Failed to load prompt target {prompt_target[2]}, retrying...") 
    #background_instruments = set()
    #while len(background_instruments) < 4:
    #    b = random.choice(list(instrs_and_stems.keys()))
    #    if prompt_target[0].upper() not in b.upper():
    #        background_instruments.add(b)
    #background = chose_rand_stems(instrs_and_stems, list(background_instruments), n=1)
    datapoint, metadata_json, background = build_datapoint(instrs_and_stems, filtered, prompt_target, mix_duration_s=10, mix_division=5, instance_probability=.8, max_seg_duration_s=3.0)
    print(i, prompt_target, background, metadata_json)
    save_datapoint(datapoint, metadata_json, os.path.join(savepath, f"example_{i}"))


loading...
.DS_Store is not a directory, skipping
._.DS_Store is not a directory, skipping
instrs_and_stems {'Electric Bass (finger)': [PosixPath('/media/eilen/EXTERNAL_USB/Research/Data/Datasets/slakh2100_yourmt3_16k/test/Track01876/stems/S00.wav'), PosixPath('/media/eilen/EXTERNAL_USB/Research/Data/Datasets/slakh2100_yourmt3_16k/test/Track01877/stems/S01.wav'), PosixPath('/media/eilen/EXTERNAL_USB/Research/Data/Datasets/slakh2100_yourmt3_16k/test/Track01881/stems/S03.wav'), PosixPath('/media/eilen/EXTERNAL_USB/Research/Data/Datasets/slakh2100_yourmt3_16k/test/Track01887/stems/S04.wav'), PosixPath('/media/eilen/EXTERNAL_USB/Research/Data/Datasets/slakh2100_yourmt3_16k/test/Track01888/stems/S01.wav'), PosixPath('/media/eilen/EXTERNAL_USB/Research/Data/Datasets/slakh2100_yourmt3_16k/test/Track01893/stems/S01.wav'), PosixPath('/media/eilen/EXTERNAL_USB/Research/Data/Datasets/slakh2100_yourmt3_16k/test/Track01895/stems/S01.wav'), PosixPath('/media/eilen/EXTERNAL_USB/Research/Data/Datasets

  audio, _ = librosa.load(path, sr=sr)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


audio.load_audio_segment : error loading /media/eilen/EXTERNAL_USB/Research/Data/Datasets/slakh2100_yourmt3_16k/test/Track01965/stems/S10.wav: [Errno 2] No such file or directory: '/media/eilen/EXTERNAL_USB/Research/Data/Datasets/slakh2100_yourmt3_16k/test/Track01965/stems/S10.wav'
Failed to load background instrument English Horn, retrying...
/media/eilen/EXTERNAL_USB/Research/Data/Datasets/slakh2100_yourmt3_16k/test/Track01903/stems/S05.wav
1 ('violin', 'violin', PosixPath('/media/eilen/EXTERNAL_USB/Research/Data/Datasets/slakh2100_yourmt3_16k/test/Track01998/stems/S05.wav')) {'Lead 6 (voice)', 'Electric Bass (pick)', 'Drums', 'Clavinet'} {"prompt": "violin", "target_instrument": "violin", "background_instruments": ["Lead 6 (voice)", "Electric Bass (pick)", "Drums", "Clavinet"]}
audio.load_audio_segment : error loading /media/eilen/EXTERNAL_USB/Research/Data/Datasets/slakh2100_yourmt3_16k/test/Track01965/stems/S10.wav: [Errno 2] No such file or directory: '/media/eilen/EXTERNAL_USB/R