## Parsing MedleyDB

In [177]:
import librosa
import soundfile as sf
import os
import glob
import shutil
import icecream as ic
import numpy as np
import itertools

In [178]:
def parse_file(metadata_path, songs_path):
    metadata = os.listdir(metadata_path)
    songs_folder = os.listdir(songs_path)

    # filters out non yaml files
    metadata = glob.glob('*.yaml', root_dir=metadata_path)

    for file in metadata:

        yaml = file.split('.')[0]

        if yaml.endswith("_METADATA") == False:
            continue

        song_name = yaml[:-9]
        # ic.ic(song_name)

        song_folder = songs_path + '/' + song_name
        assert os.path.isdir(song_folder), "Song folder does not exist"

        if song_name in songs_folder:
            shutil.copy(metadata_path + '/' + file, song_folder + '/' + file)
            # ic.ic(os.listdir(songs_path + '/' + song_name))



In [179]:
parse_file('metadata', 'songs')

In [180]:
SAMPLE_RATE = 44100

def generate_perms(songs_folder):
    songs = os.listdir(songs_folder)

    for song in songs:
        song_folder = songs_folder + '/' + song
        
        if os.path.isdir(songs_folder + '/' + song) == False:
            continue

        stems_folder = song_folder + '/' + song + '_STEMS'

        # get all the stems
        stems = glob.glob('*.wav', root_dir=stems_folder)
        num_stems = len(stems)

        # generate all possible permutations of the stems of lengths 2 to num_stems-1
        perms = []
        for i in range(2, num_stems):
            perms += itertools.combinations(stems, i)
            
        # ic.ic(perms)

        # for each permutation, create a new folder and copy the stems into it
        for i, perm in enumerate(perms):
            perm_folder = song_folder + '/' + song + '_PERM' + str(i)
            if(os.path.isdir(perm_folder) == False):
                os.mkdir(perm_folder)

            stems_arr = []

            for stem in perm:
                shutil.copy(stems_folder + '/' + stem, perm_folder + '/' + stem)

                stem_audio, sr = librosa.load(perm_folder + '/' + stem, mono=False, sr=SAMPLE_RATE)
                stems_arr.append(stem_audio)
                # ic.ic(stem_audio)

            perm_sum = np.sum(stems_arr, axis=0)
            # Audio(perm_sum, rate=SAMPLE_RATE)
            sf.write(perm_folder + '/' + song + '_PERM' + str(i) + '.wav', perm_sum.T, SAMPLE_RATE)
        # ic.ic(os.listdir(song_folder))
        # ic.ic(stems_arr)

In [182]:
generate_perms('songs')