In [28]:
import os
import shutil
import itertools
import numpy as np
import librosa
import soundfile as sf
import json

In [110]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [52]:
metadata_path = "/content/drive/MyDrive/Preprocess"
songs_path = "/content/drive/MyDrive/lanternfish/moisesDB_raw"

In [111]:
SAMPLE_RATE = 44100

def parse_file(metadata_path, songs_path):
    songs_folders = os.listdir(metadata_path)
    for song_folder_name in songs_folders:
        song_folder = os.path.join(metadata_path, song_folder_name)
        if not os.path.isdir(song_folder):
            continue

        # Read data.json file to get information about the stems
        with open(os.path.join(song_folder, 'data.json')) as f:
            data = json.load(f)
        song_name = data['song']
        stems_info = data['stems']

        # Create song folder in the destination path
        dest_song_folder = os.path.join(songs_path, song_name)
        os.makedirs(dest_song_folder, exist_ok=True)

        # Copy metadata file into the song folder
        shutil.copy(os.path.join(song_folder, 'data.json'), os.path.join(dest_song_folder, 'data.json'))

        # Copy stems into respective stem folders
        for i, stem_info in enumerate(stems_info, start=1):
            stem_name = stem_info['stemName']
            stem_folder = os.path.join(song_folder, stem_name)

            # Create stem folder in the destination path
            dest_stem_folder = os.path.join(dest_song_folder, stem_name)
            os.makedirs(dest_stem_folder, exist_ok=True)

            # Copy stem files and rename them
            for j, track in enumerate(stem_info['tracks'], start=1):
                stem_file = os.path.join(stem_folder, track['id'] + '.wav')
                if os.path.exists(stem_file):
                    new_stem_file = os.path.join(dest_stem_folder, f"{stem_name}{j}.wav")
                    shutil.copy(stem_file, new_stem_file)

        # Generate permutations
        generate_perms(dest_song_folder, stems_info)


In [112]:
def generate_perms(dest_song_folder, stems_info):
    all_stem_files = []
    for stem_info in stems_info:
        stem_name = stem_info['stemName']
        stem_folder = os.path.join(dest_song_folder, stem_name)
        stem_files = [os.path.join(stem_folder, f) for f in os.listdir(stem_folder)]
        all_stem_files.extend(stem_files)

    # Create "songs" folder
    songs_folder = os.path.join(dest_song_folder, 'songs')
    os.makedirs(songs_folder, exist_ok=True)

    # Generate permutations of all stem files
    for r in range(2, len(all_stem_files) + 1):
        perms = itertools.combinations(all_stem_files, r)
        for i, perm in enumerate(perms, start=1):
            perm_name = "song_" + "_".join([f"{stem.split('/')[-1].split('.')[0]}" for stem in perm])
            perm_folder = os.path.join(songs_folder, perm_name)
            os.makedirs(perm_folder, exist_ok=True)

            # Create Train folder for permutation
            train_folder = os.path.join(perm_folder, 'Train')
            os.makedirs(train_folder, exist_ok=True)

            # Create Test folder for permutation
            test_folder = os.path.join(perm_folder, 'Test')
            os.makedirs(test_folder, exist_ok=True)

            stems_arr = []
            # Copy and sum stem files
            for j, stem_path in enumerate(perm, start=1):
                stem_audio, _ = librosa.load(stem_path, mono=False, sr=SAMPLE_RATE)
                stems_arr.append(stem_audio)
                # Copy stem files to Test folder with new name
                new_stem_file = os.path.join(test_folder, f"{stem_path.split('/')[-1]}")
                shutil.copy(stem_path, new_stem_file)

            perm_sum = np.sum(stems_arr, axis=0)
            # Save permutation audio file to Train folder with new name
            perm_file = os.path.join(train_folder, f"{perm_name}.wav")
            sf.write(perm_file, perm_sum.T, SAMPLE_RATE)


In [113]:
parse_file("/content/drive/MyDrive/Preprocess", "/content/drive/MyDrive/lanternfish/moisesDB_raw")
#generate_perms("/content/drive/MyDrive/lanternfish/moisesDB_raw")

KeyboardInterrupt: 