In [12]:
import numpy as np
import tensorflow as tf
import librosa
import os
import soundfile as sf
import numpy as np

In [57]:

from data import SR

PATH = 'data/medley_processed/V2/'

# Split into 10s chunks & (TODO:) add overlap if we want to do for more examples

def split_audios(stem, duration=10):

    #Load in stems
    stem_audio, sr = librosa.load(stem, mono=False, sr=SR)

    # Transpose to have time as first dimension
    stem_audio = stem_audio.T

    # get total number of samples and number of chunks
    n_samples = len(stem_audio)
    print(n_samples)
    n_chunks = n_samples // (sr * duration)
    print(n_chunks)
    chunks = []

    # Split into chunks
    for i in range(n_chunks):
        start = i * sr * duration
        end = (i + 1) * sr * duration
        stem_chunk = stem_audio[start:end]
        chunks.append(stem_chunk)

    return chunks


def write_audio(chunks, stemname, destination=PATH):

    # Create directory for trainin

    for i, chunk in enumerate(chunks):
        
        sf.write(f'{destination}{stemname}_{i}.wav', chunk, SR)

In [60]:
def split_per_folder(folder):
    # Get all stems in folder
    if not os.path.isdir(PATH + folder):
        print(f"Folder {folder} does not exist. Skipping.")
        return None
    os.makedirs(PATH + folder + "/vox_training", exist_ok=True)
    os.makedirs(PATH + folder + "/vox_true", exist_ok=True)
    stem_folders = os.listdir(PATH + folder)
    stem_folders = [folder + '/' + stem_folder for stem_folder in stem_folders if "SUM" in stem_folder and "vocals" in stem_folder]
    vox_stems = [folder + '/' + stem for stem in os.listdir(PATH + folder) if stem=="vocals_INSTR"]
    if len(vox_stems) == 0:
        print("No vocals found for this folder. Skipping.", folder)
        return None
    else:
        print(f"Processing {folder}")
        print(f"Vocals found: {vox_stems[0]}")
    vox_split = split_audios(PATH + vox_stems[0] +"/vocals_INSTR_SUM.wav")
    print(len(vox_split))

    ## write vox stem
    write_audio(vox_split, "vocals", PATH + folder + "/vox_true")
    
    for i, stem in enumerate(stem_folders):
        print(f"Processing stem: {stem}")
        file = os.listdir(PATH + stem)[0]
        print(file)
        chunks = split_audios(PATH + stem + "/" + file)
        # print(chunks)
        write_audio(chunks, i, PATH + folder + "/vox_training")

def main():
    folders = os.listdir(PATH)
    for i, folder in enumerate(folders):
        split_per_folder(folder)
    


In [61]:
main()

No vocals found for this folder. Skipping. RodrigoBonelli_BalladForLaura
Processing MidnightBlue_StarsAreScreaming
Vocals found: MidnightBlue_StarsAreScreaming/vocals_INSTR
12030951
27
27
Processing stem: MidnightBlue_StarsAreScreaming/MidnightBlue_StarsAreScreaming_distortedelectricguitar_INSTR_SUM_vocals_INSTR_SUM_drumset_INSTR_SUM
MidnightBlue_StarsAreScreaming_distortedelectricguitar_INSTR_SUM_vocals_INSTR_SUM_drumset_INSTR_SUM.wav
12030951
27
Processing stem: MidnightBlue_StarsAreScreaming/MidnightBlue_StarsAreScreaming_electricbass_INSTR_SUM_distortedelectricguitar_INSTR_SUM_vocals_INSTR_SUM
MidnightBlue_StarsAreScreaming_electricbass_INSTR_SUM_distortedelectricguitar_INSTR_SUM_vocals_INSTR_SUM.wav
12030951
27
Processing stem: MidnightBlue_StarsAreScreaming/MidnightBlue_StarsAreScreaming_distortedelectricguitar_INSTR_SUM_vocals_INSTR_SUM
MidnightBlue_StarsAreScreaming_distortedelectricguitar_INSTR_SUM_vocals_INSTR_SUM.wav
12030951
27
Processing stem: MidnightBlue_StarsAreScreamin

In [67]:
for folder in os.listdir(PATH):
    if not os.path.isdir(PATH + folder):
        continue
    files = os.listdir(PATH + folder )
    files = [file for file in files if "vox_training" in file and "wav" in file]
    #move to /vox_training
    for file in files:
        os.rename(PATH + folder + "/" + file, PATH + folder+ "/vox_training/" + file)
    files = os.listdir(PATH + folder )
    files = [file for file in files if "vox_true" in file and "wav" in file]
    # remove
    for file in files:
        os.remove(PATH + folder + "/" + file)

In [70]:
# make dict for training of all files that have vox_training
files_dict = {}

for folder in os.listdir(PATH):
    if not os.path.isdir(PATH + folder):
        continue
    files = os.listdir(PATH + folder )
    if "vox_training" in files and len(os.listdir(PATH + folder + "/vox_training")) > 0:
        files = [file for file in files if "vox_training" in file and "wav" in file]
        files_dict[folder] = files

In [71]:
print(files_dict.keys())

dict_keys(['MidnightBlue_StarsAreScreaming', 'CassandraJenkins_PerfectDay', 'DahkaBand_SoldierMan', 'TheTonTons_Lush', 'BarefootSisters_RedJetta', 'TheKitchenettes_Alive', 'TleilaxEnsemble_Late', 'QuantumChromos_Circuits', 'MutualBenefit_NotForNothing', 'PeterMatthewBauer_YouAlwaysLookForSomeoneLost', 'LewisAndClarke_TheSilverSea', 'Torres_NewSkin', 'MidnightBlue_HuntingSeason', 'TrevorAndTheSoundwaves_AloneAndSad', 'FilthyBird_IdLikeToKnow', 'LittleTybee_TheAlchemist', 'PatternIsMovement_SaveMe', 'CatMartino_IPromise', 'TleilaxEnsemble_MelancholyFlowers', 'Cayetana_MissThing', 'DeadMilkmen_PrisonersCinema', 'FruitCathedral_KeepMeRunnin'])
