## Parsing MedleyDB

In [2]:
import librosa
import soundfile as sf
import os
import glob
import shutil
import icecream as ic
import numpy as np
import itertools
import zipfile
import yaml

### Copy yaml metadata from metadata folder into songs folder

In [88]:
def parse_file(metadata_path, songs_path):
    songs_folder = os.listdir(songs_path)

    # filters out non yaml files
    metadata = glob.glob('*.yaml', root_dir=metadata_path)

    for file in metadata:

        yaml = file.split('.')[0]

        if yaml.endswith("_METADATA") == False:
            continue

        song_name = yaml[:-9]
        # ic.ic(song_name)

        song_folder = songs_path + '/' + song_name
        assert os.path.isdir(song_folder), "Song folder does not exist"

        if song_name in songs_folder:
            shutil.copy(metadata_path + '/' + file, song_folder + '/' + file)
            # ic.ic(os.listdir(songs_path + '/' + song_name))



### Run
first arg: metadata folder (relative to parent directory of notebook; do not add ./)

second arg: song folder (same deal)

In [89]:
parse_file('metadata', 'songs')

### Sort stems into instrument folders

In [90]:
def sort_stems(songs_folder):
    songs = os.listdir(songs_folder)
    songs = [song for song in songs if os.path.isdir(songs_folder + '/' + song)]
    ic.ic(songs)

    for song in songs:
        with open(f"{songs_folder}/{song}/{song}_METADATA.yaml") as file:
            

            metadata = yaml.load(file, Loader=yaml.FullLoader)
            # ic.ic(metadata['stems'])

            for stem in metadata['stems']:
                # instrument = stem['instrument']
                instrument = metadata['stems'][stem]['instrument']
                # file = stem['filename']
                file = metadata['stems'][stem]['filename']

                # if(os.path.isdir(songs_folder + '/' + instrument) == False):
                if(os.path.isdir(f"{songs_folder}/{song}/{instrument}") == False):
                    os.mkdir(f"{songs_folder}/{song}/{instrument}")

                # shutil.copy(songs_folder + '/' +  file, songs_folder + '/' + instrument + '/' + file)
                # ic.ic(f"{songs_folder}/{song}/{song}_STEMS/{file}")

                num_instr_stems = len(os.listdir(f"{songs_folder}/{song}/{instrument}"))

                os.rename(f"{songs_folder}/{song}/{song}_STEMS/{file}", f"{songs_folder}/{song}/{song}_STEMS/{instrument}_{num_instr_stems}.wav")
                # os.rename(f"{songs_folder}/{song}/{instrument}/{file}", f"{songs_folder}/{song}/{instrument}/{instrument}_{num_instr_stems}.wav")

                # shutil.copy(f"{songs_folder}/{song}/{song}_STEMS/{file}", f"{songs_folder}/{song}/{instrument}/{file}")
                shutil.copy(f"{songs_folder}/{song}/{song}_STEMS/{instrument}_{num_instr_stems}.wav", f"{songs_folder}/{song}/{instrument}/{instrument}_{num_instr_stems}.wav")

                

                # os.rename(f"{songs_folder}/{song}/{instrument}/{file}", f"{songs_folder}/{song}/{instrument}/{file}")
                

In [91]:
sort_stems('songs')

ic| songs: ['Allegria_MendelssohnMovement1']


### generate_perms
zips entire song folder at the end with sh.make_archive

In [4]:
SAMPLE_RATE = 44100

def generate_perms(songs_folder):
    songs = os.listdir(songs_folder)

    for song in songs:
        song_folder = songs_folder + '/' + song
        
        if os.path.isdir(songs_folder + '/' + song) == False:
            continue

        stems_folder = song_folder + '/' + song + '_STEMS'

        # get all the stems
        stems = glob.glob('*.wav', root_dir=stems_folder)
        num_stems = len(stems)

        # generate all possible permutations of the stems of lengths 2 to num_stems-1
        perms = []
        for i in range(2, num_stems):
            perms += itertools.combinations(stems, i)
            
        # ic.ic(perms)

        # for each permutation, create a new folder and copy the stems into it
        for i, perm in enumerate(perms):
            # perm_folder = song_folder + '/' + song + '_PERM' + str(i)
            # perm_instruments = [stem.split('_')[0] for stem in perm]
            perm_instruments = "_".join([p.split('.')[0] for p in perm])
            perm_folder = song_folder + '/' + song + '_' + perm_instruments
            if(os.path.isdir(perm_folder) == False):
                os.mkdir(perm_folder)

            stems_arr = []

            for stem in perm:
                shutil.copy(stems_folder + '/' + stem, perm_folder + '/' + stem)

                stem_audio, sr = librosa.load(perm_folder + '/' + stem, mono=False, sr=SAMPLE_RATE)
                stems_arr.append(stem_audio)
                # ic.ic(stem_audio)

            perm_sum = np.sum(stems_arr, axis=0)
            # sf.write(perm_folder + '/' + song + '_PERM' + str(i) + '.wav', perm_sum.T, SAMPLE_RATE)
            # sf.write(perm_folder + '/' + song + '_PERM' + str(i) + '.wav', perm_sum.T, SAMPLE_RATE)
            sf.write(perm_folder + '/' + song + '_' + perm_instruments + '.wav', perm_sum.T, SAMPLE_RATE)
            
        shutil.make_archive(song_folder, format='zip', root_dir=song_folder)

### Run
arg: songs folder with stems + yaml metadata already in there (relative to parent directory of notebook; do not add ./)

In [6]:
generate_perms('songs')

KeyboardInterrupt: 