In [1]:
import numpy as np
import tensorflow as tf
import librosa
import os
import soundfile as sf
import numpy as np
import data
import pywt
import cv2
from data import WaveData

In [2]:

from data import SR

PATH = 'data/medley_processed/V2/'

# Split into 10s chunks & (TODO:) add overlap if we want to do for more examples

def split_audios(stem, duration=10):

    #Load in stems
    stem_audio, sr = librosa.load(stem, mono=False, sr=SR)

    # Transpose to have time as first dimension
    stem_audio = stem_audio.T

    # get total number of samples and number of chunks
    n_samples = len(stem_audio)
    print(n_samples)
    n_chunks = n_samples // (sr * duration)
    print(n_chunks)
    chunks = []

    # Split into chunks
    for i in range(n_chunks):
        start = i * sr * duration
        end = (i + 1) * sr * duration
        stem_chunk = stem_audio[start:end]
        chunks.append(stem_chunk)

    return chunks


def write_audio(chunks, stemname, destination=PATH):

    # Create directory for trainin

    for i, chunk in enumerate(chunks):
        
        sf.write(f'{destination}{stemname}_{i}.wav', chunk, SR)

In [3]:
def split_per_folder(folder):
    # Get all stems in folder
    if not os.path.isdir(PATH + folder):
        print(f"Folder {folder} does not exist. Skipping.")
        return None
    os.makedirs(PATH + folder + "/vox_training", exist_ok=True)
    os.makedirs(PATH + folder + "/vox_true", exist_ok=True)
    stem_folders = os.listdir(PATH + folder)
    stem_folders = [folder + '/' + stem_folder for stem_folder in stem_folders if "SUM" in stem_folder and "vocals" in stem_folder]
    vox_stems = [folder + '/' + stem for stem in os.listdir(PATH + folder) if stem=="vocals_INSTR"]
    if len(vox_stems) == 0:
        print("No vocals found for this folder. Skipping.", folder)
        return None
    else:
        print(f"Processing {folder}")
        print(f"Vocals found: {vox_stems[0]}")
    vox_split = split_audios(PATH + vox_stems[0] +"/vocals_INSTR_SUM.wav")
    print(len(vox_split))

    ## write vox stem
    write_audio(vox_split, "vocals", PATH + folder + "/vox_true")
    
    for i, stem in enumerate(stem_folders):
        print(f"Processing stem: {stem}")
        file = os.listdir(PATH + stem)[0]
        print(file)
        chunks = split_audios(PATH + stem + "/" + file)
        # print(chunks)
        write_audio(chunks, i, PATH + folder + "/vox_training")

def main():
    folders = os.listdir(PATH)
    for i, folder in enumerate(folders):
        split_per_folder(folder)
    


In [61]:
main()

No vocals found for this folder. Skipping. RodrigoBonelli_BalladForLaura
Processing MidnightBlue_StarsAreScreaming
Vocals found: MidnightBlue_StarsAreScreaming/vocals_INSTR
12030951
27
27
Processing stem: MidnightBlue_StarsAreScreaming/MidnightBlue_StarsAreScreaming_distortedelectricguitar_INSTR_SUM_vocals_INSTR_SUM_drumset_INSTR_SUM
MidnightBlue_StarsAreScreaming_distortedelectricguitar_INSTR_SUM_vocals_INSTR_SUM_drumset_INSTR_SUM.wav
12030951
27
Processing stem: MidnightBlue_StarsAreScreaming/MidnightBlue_StarsAreScreaming_electricbass_INSTR_SUM_distortedelectricguitar_INSTR_SUM_vocals_INSTR_SUM
MidnightBlue_StarsAreScreaming_electricbass_INSTR_SUM_distortedelectricguitar_INSTR_SUM_vocals_INSTR_SUM.wav
12030951
27
Processing stem: MidnightBlue_StarsAreScreaming/MidnightBlue_StarsAreScreaming_distortedelectricguitar_INSTR_SUM_vocals_INSTR_SUM
MidnightBlue_StarsAreScreaming_distortedelectricguitar_INSTR_SUM_vocals_INSTR_SUM.wav
12030951
27
Processing stem: MidnightBlue_StarsAreScreamin

In [67]:
for folder in os.listdir(PATH):
    if not os.path.isdir(PATH + folder):
        continue
    files = os.listdir(PATH + folder )
    files = [file for file in files if "vox_training" in file and "wav" in file]
    #move to /vox_training
    for file in files:
        os.rename(PATH + folder + "/" + file, PATH + folder+ "/vox_training/" + file)
    files = os.listdir(PATH + folder )
    files = [file for file in files if "vox_true" in file and "wav" in file]
    # remove
    for file in files:
        os.remove(PATH + folder + "/" + file)

In [4]:
# make dict for training of all files that have vox_training
files_dict = {}

for folder in os.listdir(PATH):
    if not os.path.isdir(PATH + folder):
        continue
    files = os.listdir(PATH + folder )
    if "vox_training" in files and len(os.listdir(PATH + folder + "/vox_training")) > 0:
        files = [file for file in files if "vox_training" in file and "wav" in file]
        files_dict[folder] = files

In [5]:
print(files_dict.keys())

dict_keys(['MidnightBlue_StarsAreScreaming', 'CassandraJenkins_PerfectDay', 'DahkaBand_SoldierMan', 'TheTonTons_Lush', 'BarefootSisters_RedJetta', 'TheKitchenettes_Alive', 'TleilaxEnsemble_Late', 'QuantumChromos_Circuits', 'MutualBenefit_NotForNothing', 'PeterMatthewBauer_YouAlwaysLookForSomeoneLost', 'LewisAndClarke_TheSilverSea', 'Torres_NewSkin', 'MidnightBlue_HuntingSeason', 'TrevorAndTheSoundwaves_AloneAndSad', 'FilthyBird_IdLikeToKnow', 'LittleTybee_TheAlchemist', 'PatternIsMovement_SaveMe', 'CatMartino_IPromise', 'TleilaxEnsemble_MelancholyFlowers', 'Cayetana_MissThing', 'DeadMilkmen_PrisonersCinema', 'FruitCathedral_KeepMeRunnin'])


In [10]:
test_song_name = 'MidnightBlue_StarsAreScreaming'


def getWaveletTransform(data, song, level=12):
    if data[song].waveform.shape[0] == 2:
        data[song].waveform = np.transpose(data[song].waveform)
    print(f"Left channel waveform: {data[song].waveform[:, 0]}")
    # Perform wavelet decomposition
    coeffs_left = pywt.wavedec(data[song].waveform[:, 0], 'db1', level=level)
    coeffs_right = pywt.wavedec(data[song].waveform[:, 1], 'db1', level=level)
    print(f"Left coefficients shape: {[c.shape for c in coeffs_left]}")

    # Find the maximum length among all coefficients
    max_len = max([c.shape[0] for c in coeffs_left + coeffs_right])

    # Stretch the coefficients to the maximum length using interpolation
    stretched_coeffs_left = []
    stretched_coeffs_right = []
    for c_left, c_right in zip(coeffs_left, coeffs_right):
        stretched_left = cv2.resize(c_left.reshape(1, -1), (max_len, 1), interpolation=cv2.INTER_NEAREST).flatten()
        stretched_right = cv2.resize(c_right.reshape(1, -1), (max_len, 1), interpolation=cv2.INTER_NEAREST).flatten()
        stretched_coeffs_left.append(stretched_left)
        stretched_coeffs_right.append(stretched_right)

    # Stack the stretched coefficients along the channel axis
    stacked_coeffs = np.stack([stretched_coeffs_left, stretched_coeffs_right], axis=-1)

    # Convert the stacked coefficients to a TensorFlow tensor
    tensor_coeffs = tf.convert_to_tensor(stacked_coeffs)
    print(f"Tensor coefficients shape: {tensor_coeffs.shape}")

    data[song].dwt = coeffs_left
    data[song].tensor_coeffs = tensor_coeffs


    return data

def makeWaveDict(folder_name):
    data = {}
    filenames = os.listdir(folder_name)
    filenames = [folder_name + filename for filename in filenames if filename.endswith('.wav')]
    for filename in filenames:
        print(f"Loading {filename}")
        waveform, _ =librosa.load(filename, sr=SR, mono=False)
        print(f"Waveform shape: {waveform.shape}")
        np.transpose(waveform)
        data[filename] = WaveData(filename, waveform, None)
    return data

TEST_SONG_PATH_TRAINING = PATH + test_song_name + "/vox_training/"
TEST_SONG_PATH_TRUE = PATH + test_song_name + "/vox_true/"
assert os.path.isdir(TEST_SONG_PATH_TRAINING)
assert os.path.isdir(TEST_SONG_PATH_TRUE)


train_dict = makeWaveDict(TEST_SONG_PATH_TRAINING)
true_dict = makeWaveDict(TEST_SONG_PATH_TRUE)

def make_test_set(train_dict, true_dict, level=12):
    
    # for each file in train_dict, get the corresponding file in true_dict (ends with same last number + .wav)
    # call get_wavelet_transform on both dicts, and then use the WaveData.tensor_coeffs to make a test set
    # return test set

    y_train = []
    y_true = []
    for key in train_dict.keys():
        train = train_dict[key]
        index = int(key.split("_")[-1][0])
        assert int(index) >= 0
        assert index != ""
        true_key = f"vocals_{index}_pred.wav"
        true_key = TEST_SONG_PATH_TRUE + true_key
        print(true_key)
        getWaveletTransform(train_dict, key, level)
        getWaveletTransform(true_dict, true_key, level)
        true = true_dict[true_key]
        train_tensor = train.tensor_coeffs
        true_tensor = true.tensor_coeffs
        y_train.append(train_tensor)
        y_true.append(true_tensor)

    return y_train, y_true

y_train, y_true = make_test_set(train_dict, true_dict, level=5)
print(y_train[0])



# make test set


Loading data/medley_processed/V2/MidnightBlue_StarsAreScreaming/vox_training/vox_training0_1.wav
Waveform shape: (2, 441000)
Loading data/medley_processed/V2/MidnightBlue_StarsAreScreaming/vox_training/vox_training4_16.wav
Waveform shape: (2, 441000)
Loading data/medley_processed/V2/MidnightBlue_StarsAreScreaming/vox_training/vox_training3_23.wav
Waveform shape: (2, 441000)
Loading data/medley_processed/V2/MidnightBlue_StarsAreScreaming/vox_training/vox_training2_3.wav
Waveform shape: (2, 441000)
Loading data/medley_processed/V2/MidnightBlue_StarsAreScreaming/vox_training/vox_training1_26.wav
Waveform shape: (2, 441000)
Loading data/medley_processed/V2/MidnightBlue_StarsAreScreaming/vox_training/vox_training4_5.wav
Waveform shape: (2, 441000)
Loading data/medley_processed/V2/MidnightBlue_StarsAreScreaming/vox_training/vox_training4_4.wav
Waveform shape: (2, 441000)
Loading data/medley_processed/V2/MidnightBlue_StarsAreScreaming/vox_training/vox_training2_2.wav
Waveform shape: (2, 44100

In [11]:
train_tensor = tf.convert_to_tensor(y_train)
test_tensor = tf.convert_to_tensor(y_true)


# Convert TensorFlow tensors to NumPy arrays
train_array = train_tensor.numpy()
test_array = test_tensor.numpy()

# Save the NumPy arrays
np.save("y_train_level5.npy", train_array)
np.save("y_true_level5.npy", test_array)
