In [None]:
import os
import shutil
import itertools
import numpy as np
import librosa
import soundfile as sf
import json

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
metadata_path = "/content/drive/MyDrive/Preprocess"
songs_path = "/content/drive/MyDrive/lanternfish/moisesDB_raw"

In [None]:
moisesDB_raw_path = "/content/drive/MyDrive/lanternfish/moisesDB_raw"

# Define the path for the TEST folder
test_folder_path = os.path.join(moisesDB_raw_path, "TEST 1")

# Create the TEST folder
os.makedirs(test_folder_path, exist_ok=True)

# Check if the folder was created successfully
if os.path.exists(test_folder_path):
    print("TEST folder created successfully.")
else:
    print("Failed to create TEST folder.")

TEST folder created successfully.


In [None]:
SAMPLE_RATE = 44100

def parse_file(metadata_path, songs_path):
    songs_folders = os.listdir(metadata_path)

    for song_folder_name in songs_folders:
        song_folder = os.path.join(metadata_path, song_folder_name)

        if not os.path.isdir(song_folder):
            continue

        # Read data.json file to get information about the stems
        with open(os.path.join(song_folder, 'data.json')) as f:
            data = json.load(f)

        song_name = data['song']
        stems_info = data['stems']

        # Create song folder in the destination path
        dest_song_folder = os.path.join(songs_path, song_name)
        os.makedirs(dest_song_folder, exist_ok=True)

        # Copy metadata file into the song folder
        shutil.copy(os.path.join(song_folder, 'data.json'), os.path.join(dest_song_folder, 'data.json'))

        # Copy stems into respective stem folders
        for stem_info in stems_info:
            stem_name = stem_info['stemName']
            stem_folder = os.path.join(song_folder, stem_name)

            # Create stem folder in the destination path
            dest_stem_folder = os.path.join(dest_song_folder, stem_name)
            os.makedirs(dest_stem_folder, exist_ok=True)

            # Copy stem files
            for track in stem_info['tracks']:
                stem_file = os.path.join(stem_folder, track['id'] + '.wav')
                if os.path.exists(stem_file):
                    shutil.copy(stem_file, dest_stem_folder)

In [None]:
def generate_perms(songs_folder):
    songs_folders = os.listdir(songs_folder)

    for song_folder_name in songs_folders:
        song_folder = os.path.join(songs_folder, song_folder_name)

        if not os.path.isdir(song_folder):
            continue

        # Get list of stem folders
        stem_folders = [f for f in os.listdir(song_folder) if os.path.isdir(os.path.join(song_folder, f))]

        # Generate permutations of stem files
        for r in range(2, len(stem_folders) + 1):
            perms = itertools.combinations(stem_folders, r)

            for i, perm in enumerate(perms):
                perm_folder = os.path.join(song_folder, 'PERM' + str(i))
                os.makedirs(perm_folder, exist_ok=True)

                # Create Train folder for permutation
                train_folder = os.path.join(perm_folder, 'Train')
                os.makedirs(train_folder, exist_ok=True)

                # Create Test folder for permutation
                test_folder = os.path.join(perm_folder, 'Test')
                os.makedirs(test_folder, exist_ok=True)

                stems_arr = []

                # Copy and sum stem files
                for stem_name in perm:
                    stem_folder = os.path.join(song_folder, stem_name)
                    stem_files = os.listdir(stem_folder)
                    for stem_file in stem_files:
                        stem_path = os.path.join(stem_folder, stem_file)
                        stem_audio, _ = librosa.load(stem_path, mono=False, sr=SAMPLE_RATE)
                        stems_arr.append(stem_audio)

                        # Copy stem files to Test folder
                        shutil.copy(stem_path, os.path.join(test_folder, stem_file))

                perm_sum = np.sum(stems_arr, axis=0)

                # Save permutation audio file to Train folder
                sf.write(os.path.join(train_folder, 'PERM' + str(i) + '.wav'), perm_sum.T, SAMPLE_RATE)


In [None]:
parse_file("/content/drive/MyDrive/Preprocess", "/content/drive/MyDrive/lanternfish/moisesDB_raw")
generate_perms("/content/drive/MyDrive/lanternfish/moisesDB_raw")

KeyboardInterrupt: 