In [1]:
# __INIT__ 

import medleydb as mdb
import librosa
import numpy as np
import matplotlib.pyplot as plt
import librosa.display
import pandas as pd
import IPython.display as ipd
import os
from os import path
from iterstrat.ml_stratifiers import MultilabelStratifiedShuffleSplit
from sklearn.preprocessing import MultiLabelBinarizer

HOME_PATH = path.expanduser("~")

DATA_PATH = path.join(HOME_PATH, 'data', 'preprocessed')
if not os.path.exists(DATA_PATH):
        os.makedirs(DATA_PATH)   
        
SOURCE_PATH = path.join(DATA_PATH, 'source_ids')
if not os.path.exists(SOURCE_PATH):
        os.makedirs(SOURCE_PATH) 
        
MIX_PATH = path.join(HOME_PATH, 'data', 'mixes')
if not os.path.exists(MIX_PATH):
        os.makedirs(MIX_PATH) 

# Ändra till detta i en paketstruktur
# DATA_PATH = path.join(path.dirname(__file__), 'data', 'preprocessed')

# Based off of OpenMICs taxonomy discussions and the MedleyDB taxonomy yaml
OPENMIC_TO_MEDLEY = {"drums" : ["drum set"],
                     "bass" : ["electric bass", "double bass"],
                     "guitar" : ["distorted electric guitar", "clean electric guitar", "acoustic guitar"], 
                     "voice" : ["male singer", "female singer", "male speaker", "female speaker",
                                "male rapper", "female rapper", "beatboxing", "vocalists", "choir",
                                "male screamer", "female screamer"], 
                     "piano" : ["piano", "tack piano", "electric piano"],
                     "synthesizer" : ["synthesizer", "fx/processed sound"],
                     #"organ" : ["pipe organ", "electric organ"],
                     #"accordion" : ["accordion"],
                     #"banjo" : ["banjo"], 
                     "cello" : ["cello", "cello section"], 
                     "clarinet" : ["clarinet", "clarinet section", "bass clarinet"], 
                     "cymbals" : ["cymbal"],
                     "flute" : ["flute", "dizi", "flute", "flute section", "piccolo",
                                "bamboo flute", "panpipes", "recorder"],
                     "mallet_percussion" : ["xylophone", "vibraphone", "glockenspiel", "marimba"],
                     "mandolin" : ["mandolin"],
                     "saxophone": ["alto saxophone", "baritone saxophone", "tenor saxophone", "soprano saxophone"], 
                     "trombone": ["trombone", "trombone section"], 
                     "trumpet" : ["trumpet", "trumpet section"],
                     #"ukulele" : ["ukulele"], 
                     "violin" : ["violin", "violin seciton"]} 

INSTRUMENTS = OPENMIC_TO_MEDLEY.keys()
INSTRUMENT_INDEX = {key : i for i, (key, _) in enumerate(OPENMIC_TO_MEDLEY.items())}
MEDLEY_TO_OPENMIC = {v: k for k, v_list in OPENMIC_TO_MEDLEY.items() for v in v_list}
MEDLEY_TO_INDEX = {k: INSTRUMENT_INDEX[v] for k, v in MEDLEY_TO_OPENMIC.items()}
REV_INSTRUMENT_INDEX = {v: k for k, v in INSTRUMENT_INDEX.items()}

/home/carlnys/data/medleydb/medleydb/data/Metadata


  INST_TAXONOMY = yaml.load(fhandle)
  MIXING_COEFFICIENTS = yaml.load(fhandle)


In [4]:
class Track(object):
    """ A track class invariant of medleyDBs multitrack. Contains information for pre-processing.
    
    To be used for pre-processing the minimal information needed is the following: 
        A trackID,
        A dictionary mapping stems to instruments, 
        A file path to the .wav-file
        And a path to the activation_conf file
        
    If a source_file doesn't exist it will be created in the pre-processing step. 
    """
    def __init__(self, track):
        """
        Args:
            track (medleydb.MultiTrack or str): Pass either a MultiTrack or the path of a mix.
            data_type (str): To specify if the track should be used for training, testing, validation or non-labeled.
        """
        if isinstance(track, mdb.MultiTrack):
            self.track_id = track.track_id
            
            self.instruments, self.stem_instruments = self.extract_instruments(track)
            assert len(self.instruments) > 0, "no valid instruments in song"
            
            assert os.path.exists(track.mix_path), "%s does not exist" % track.mix_path
            self.mix_path = track.mix_path
                
            if os.path.exists(track.activation_conf_v2_fpath):   
                self.activation_conf_path = track.activation_conf_v2_fpath
            elif os.path.exists(track.activation_conf_fpath): 
                self.activation_conf_path = track.activation_conf_fpath
            else: 
                raise Exception("no activation_conf files found")
            
            # If no source, we create it in preprocessing step
            if os.path.exists(os.path.join(SOURCE_PATH, "%s.csv" % self.track_id)): 
                self.source_path = os.path.join(SOURCE_PATH, "%s.csv" % self.track_id)
            else:
                self.source_path = None
            
            self.is_medley = True
                
        else: 
            self.track_id = track
            
            # TODO
            assert os.path.exists(os.path.join(MIX_PATH, track, "metadata.npy")), "%s does not exist" % os.path.join(MIX_PATH, track, "metadata.npy")
            self.stem_instruments = np.load(os.path.join(MIX_PATH, track, "stem_instruments.npy"), allow_pickle=True).item() 
            
            # TODO 
            assert os.path.exists(os.path.join(MIX_PATH, track, "%s.wav" % track)), "%s does not exist" % os.path.join(MIX_PATH, track, "%s.wav"%track) 
            self.mix_path = os.path.join(MIX_PATH, track, "%s.wav" % track)
            
            # TODO 
            assert os.path.exists(os.path.join(MIX_PATH, track, "activation_conf.csv")), "%s does not exist" % os.path.join(MIX_PATH, track, "activation_conf.csv")  
            self.activation_conf_path = os.path.join(MIX_PATH, track, "activation_conf.csv")
            
            # TODO 
            if os.path.exists(os.path.join(SOURCE_PATH,  "%s.csv" % track)):
                self.source_path = os.path.join(SOURCE_PATH,  "%s.csv" % track)
            else:
                self.source_path = None
                
            self.is_medley = False
        
    def medleys_to_openmic(self, instruments):
        return [MEDLEY_TO_OPENMIC[i] for i in instruments if i in MEDLEY_TO_OPENMIC.keys()]
    
    def extract_instruments(self, t):
        instruments = {MEDLEY_TO_OPENMIC[i] for i in t.stem_instruments if i in MEDLEY_TO_OPENMIC.keys()}
        stem_instruments = {self.generate_stem_key(k): self.medleys_to_openmic(v.instrument) for k, v in t.stems.items() if len(set(v.instrument).intersection(MEDLEY_TO_OPENMIC.keys())) > 0}
        return instruments, stem_instruments
    
    def generate_stem_key(self, x):
        if x < 10:
            return "S0%d" % x
        else:
            return "S%d" % x

In [21]:
def load_track(track_id):
    t_gen = mdb.load_multitracks([track_id])
    return next(t_gen)

def generate_id(x):
    return str(x).rjust(5, "0") 

def generate_stem_key(x):
    if x < 10:
        return "S0%d" % x
    else:
        return "S%d" % x

def create_spectrogram(track, spectrogram_params):
    spectrogram_len = spectrogram_params['spectrogram_len']
    n_fft = spectrogram_params['n_fft']
    hop_length = spectrogram_params['hop_length']
    n_mels = spectrogram_params['n_mels']
    sr = spectrogram_params['sample_rate']
    
    y, sr = librosa.load(track.mix_path, sr=sr)
    print("Length of track %s is: %d" % (track.track_id, len(y)//sr))
    assert len(y)//sr > spectrogram_len  
    samples_per_spectrogram = int(sr*spectrogram_len)
    num_spectrograms = len(y)//samples_per_spectrogram 
    # Remove samples that doesn't divide equally with s_p_s
    y = y[:num_spectrograms*samples_per_spectrogram]
    spectrograms = []
    # could optimize by saving spectrograms directly here
    # however imo code is cleaner if saved in helper func
    for i in range(0, len(y), samples_per_spectrogram):
        sound_bite = y[i:i+samples_per_spectrogram]
        S = librosa.feature.melspectrogram(y=sound_bite, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels)
        S_dB = librosa.power_to_db(S, ref=np.max).astype(np.float32)
        spectrograms.append(S_dB)    
    spectrograms = np.stack(spectrograms, axis=0)
    return spectrograms
    
def create_labels(t, num_spectrograms, spectrogram_params):
    spectrogram_len = spectrogram_params['spectrogram_len']
    time_cutoff = num_spectrograms*spectrogram_len
    #Rows = which spectrogram, Cols = Which instrument
    instrument_annotations = np.zeros((num_spectrograms, len(INSTRUMENTS)))
    annotations = pd.read_csv(t.source_path)
    for index, row in annotations.iterrows():
        instrument = row["instrument_label"]
        if instrument in INSTRUMENTS:
            s_t = row["start_time"]
            e_t = row["end_time"]
            if e_t > time_cutoff: 
                e_t = time_cutoff
            d_t = e_t - s_t
            s_i = int(s_t//spectrogram_len)
            e_i = int(e_t//spectrogram_len)
            #print("instrument: ", instrument, "(%s)" % INSTRUMENT_INDEX[instrument])
            #print("start index: ", s_i)
            #print("end index: ", e_i, "\n") 
            for i in range(s_i, e_i):
                instrument_annotations[i][INSTRUMENT_INDEX[instrument]] = 1
    return instrument_annotations
    
def create_source_ids(t): 
    conf_df = pd.read_csv(t.activation_conf_path)
    columns = conf_df.columns
    num_instruments = len(columns)
    start_time = [0] * num_instruments
    new_bin = [False] * num_instruments
    source_activations = [] 
    s_i = t.stem_instruments
    for index, row in conf_df.iterrows():
        time = row[0]
        stem = 1
        for conf in row[1:]:
            # check for stems not included after relabeling
            if not columns[stem] in s_i.keys():
                stem += 1 
                continue 
            if not conf >= 0.5 and new_bin[stem]:
                new_bin[stem] = False
                for instrument_label in s_i[columns[stem]]: 
                    source_activations.append((start_time[stem], time-0.0464, instrument_label))
            if conf >= 0.5 and not new_bin[stem]:
                new_bin[stem] = True
                start_time[stem] = time
            stem += 1
            
    source_df = pd.DataFrame(source_activations, columns=['start_time', 'end_time', 'instrument_label']) 
    source_df = source_df.sort_values(by=['instrument_label'])  
    
    source_df.to_csv(path_or_buf = os.path.join(SOURCE_PATH, "%s.csv" % t.track_id), index=False, float_format='%.4f')
    t.source_path = os.path.join(SOURCE_PATH, "%s.csv" % t.track_id)

def create_matrix_labels(t, num_spectrograms, spectrogram_params, bins_per_spectrogram):
    spectrogram_len = spectrogram_params['spectrogram_len']
    time_cutoff = num_spectrograms*spectrogram_len
    bin_len = spectrogram_len / bins_per_spectrogram
    instrument_annotations = np.zeros((len(INSTRUMENTS),int(num_spectrograms*bins_per_spectrogram)))
    annotations = pd.read_csv(t.source_path)
    for index, row in annotations.iterrows():
        instrument = row["instrument_label"]
        if instrument in INSTRUMENTS:
            s_t = row["start_time"]
            e_t = row["end_time"]
            if e_t > time_cutoff: 
                e_t = time_cutoff
            s_i = int(s_t//bin_len)
            e_i = int(e_t//bin_len)
            for i in range(s_i, e_i):
                instrument_annotations[INSTRUMENT_INDEX[instrument]][i] = 1
    return instrument_annotations

# Test matrix labels
# t = Track(load_track('MusicDelta_Shadows'))
# y, sr = librosa.load(t.mix_path)
# window_length = int(0.0464 * sr)
# hop_length = int(0.0116 * sr)
# spectrograms = create_spectrogram(t, 1, window_length, hop_length, 96)
# number_of_spectrograms = spectrograms.shape[0]
# bins_per_spectrogram = spectrograms.shape[2]
# labels = create_matrix_labels(t, number_of_spectrograms, 1, bins_per_spectrogram) 
# import sys
# np.set_printoptions(threshold=sys.maxsize)
# print(2871/spectrograms.shape[2])
# print(0.0116*2871)
# print(iad.shape)
# matrix_labels = True
# for i in range(number_of_spectrograms):
#     spectrogram = spectrograms[i]
#     label = labels[i*bins_per_spectrogram:i*bins_per_spectrogram+bins_per_spectrogram] if matrix_labels else labels[i]
#     print(label.shape)

def preprocess_track(track, dataset_type, spectrogram_params, matrix_labels=False, recalculate_everything=False):     
    try:
        spectrograms = create_spectrogram(track, spectrogram_params)
    except: 
        return 
    
    number_of_spectrograms = spectrograms.shape[0]
   
    if track.source_path is None or recalculate_everything:
        create_source_ids(track)
        
    if matrix_labels:
        bins_per_spectrogram = spectrograms.shape[2]
        labels = create_matrix_labels(track, number_of_spectrograms, spectrogram_params, bins_per_spectrogram)
    else:
        labels = create_labels(track, number_of_spectrograms, spectrogram_params)

    base_path = os.path.join(DATA_PATH, "%d_%d_%d_%d" % (spectrogram_params['spectrogram_len'],
                                                         spectrogram_params['n_fft'], 
                                                         spectrogram_params['hop_length'],
                                                         spectrogram_params['n_mels']))
    
    input_path = os.path.join(base_path, dataset_type, "input")
    label_path = os.path.join(base_path, dataset_type, "matrix_labels") if matrix_labels else os.path.join(base_path, dataset_type, "labels")
    
    if not(os.path.exists(input_path)):
        os.makedirs(input_path)  
    
    if not(os.path.exists(label_path)):
        os.makedirs(label_path)  
    
    for i in range(number_of_spectrograms):
        spectrogram = spectrograms[i]
        label = labels[:, i*bins_per_spectrogram:i*bins_per_spectrogram+bins_per_spectrogram] if matrix_labels else labels[i]
        np.save(os.path.join(input_path, "%s_%s.npy" % (track.track_id, generate_id(i))), spectrogram)
        np.save(os.path.join(label_path, "%s_%s.npy" % (track.track_id, generate_id(i))), label)

def create_train_val_test_split(tracks, test_size = 0.2, val_size = 0.2):
    X = []
    Y = []
    
    for t in tracks: 
        X.append(t)
        Y.append(list(t.instruments))
    
    mlb = MultiLabelBinarizer(list(INSTRUMENTS))
    Y = mlb.fit_transform(Y)
    X = np.array(X)
    
    test_train = MultilabelStratifiedShuffleSplit(n_splits=2, test_size = test_size, random_state=0)
    
    for train_index, test_index in test_train.split(X, Y):
        X_train, test = X[train_index], X[test_index]
        Y_train, Y_test = Y[train_index], Y[test_index]
        break
    
    train_val = MultilabelStratifiedShuffleSplit(n_splits=2, test_size = val_size, random_state=0)

    for train_index, test_index in train_val.split(X_train, Y_train):
        train, val = X_train[train_index], X_train[test_index]
        train_labels, val_labels = Y_train[train_index], Y_train[test_index]
        break
        
    print("Training sample length:", train.shape[0])
    print("Distribution:", np.sum(train_labels, axis=0))
    print("Validation sample length:", val.shape[0])
    print("Distribution:", np.sum(val_labels, axis=0))
    print("Test sample length", test.shape[0])
    print("Distribution:", np.sum(Y_test, axis=0))
    
    return train, val, test

def create_tracks_from_medley(dataset_version):
    tracks = []
    unloaded_tracks = []
    loadable = 0 
    song_count = 0
    for t in mdb.load_all_multitracks(dataset_version): 
        song_count += 1
        try:
            tracks.append(Track(t))
        except: 
            unloaded_tracks.append(t.track_id)
            continue
        loadable += 1
    print("Loaded %s/%s tracks" % (loadable, song_count))
    return tracks, unloaded_tracks

    
def test_dimensions_given_params(matrix = True):
    if matrix:
        spectrogram_params = {'sample_rate' : 16000,
                        'spectrogram_len' : 8.191,
                        'n_fft' : 1024,
                        'hop_length' : 512,
                        'n_mels' : 128}
    else: 
        spectrogram_params = {'sample_rate' : 22143,
                            'spectrogram_len' : 1,
                            'n_fft' : 173*4,
                            'hop_length' : 173,
                            'n_mels' : 128} 

    # Test matrix labels
    t = Track(load_track('MusicDelta_Shadows'))
    y, sr = librosa.load(t.mix_path, spectrogram_params['sample_rate'])
    spectrograms = create_spectrogram(t,spectrogram_params)
    print(spectrograms.shape)
    number_of_spectrograms = spectrograms.shape[0]
    bins_per_spectrogram = spectrograms.shape[2]
    labels = create_matrix_labels(t, number_of_spectrograms, spectrogram_params, bins_per_spectrogram) 
    import sys
    np.set_printoptions(threshold=sys.maxsize)
    print(labels.shape)
    matrix_labels = True
    for i in range(number_of_spectrograms):
        spectrogram = spectrograms[i]
        label = labels[:, i*bins_per_spectrogram:i*bins_per_spectrogram+bins_per_spectrogram] if matrix_labels else labels[i]
        print(label.shape)

def preprocess_tracks(spectrogram_params = None,
                      test_size = 0.2,
                      val_size = 0.2,
                      matrix_labels = False,
                      dataset_version = ['V1', 'V2'],
                      path_to_mixes = None):
    
    # samplerate = 16000
    # window = 1024
    # hop = 512
    # duration = 8.191  # (8.192 - 0.001 because librosa round up)
    # assert duration * samplerate / hop == 256
    
 
    # samplerate = 22143, (22144 - 1, because librosa round up)
    # window = 173 * 4
    # hop = 173
    # duration = 1  # sekunder
    # assert duration * samplerate / hop == 128
    
    if spectrogram_params is None and matrix_labels:
        spectrogram_params = {'sample_rate' : 16000,
                              'spectrogram_len' : 8.191,
                              'n_fft' : 1024,
                              'hop_length' : 512,
                              'n_mels' : 128}
        
    elif spectrogram_params is None and not matrix_labels:
        spectrogram_params = {'sample_rate' : 22143,
                              'spectrogram_len' : 1,
                              'n_fft' : 173*4,
                              'hop_length' : 173,
                              'n_mels' : 128}     
            
    tracks, _ = create_tracks_from_medley(dataset_version)
    # TODO: APPEND MIX-TRACKS
    
    train, val, test = create_train_val_test_split(tracks, test_size = test_size, val_size = val_size)
    
    count = 0 
    
    for t in train:
        preprocess_track(t, "train", spectrogram_params, matrix_labels = matrix_labels)
        count += 1 
        print("Processing: %s/%s" % (count, len(tracks)), end="\r")
    for t in val:
        preprocess_track(t, "validation", spectrogram_params, matrix_labels = matrix_labels)
        count += 1 
        print("Processing: %s/%s" % (count, len(tracks)), end="\r")
    for t in test: 
        preprocess_track(t, "test", spectrogram_params, matrix_labels = matrix_labels)
        count += 1 
        print("Processing: %s/%s" % (count, len(tracks)), end="\r")
        
    # TODO: PREPROCESS MIXES

In [22]:
test_dimensions_given_params(True)

Length of track MusicDelta_Shadows is: 33
(4, 128, 256)
(16, 1024)
(16, 256)
(16, 256)
(16, 256)
(16, 256)


In [12]:
t = load_track('MusicDelta_Shadows')
tr = Track(t)
print(tr.stem_instruments)
print(help(t))

{'S01': ['bass'], 'S02': ['drums'], 'S03': ['guitar'], 'S04': ['guitar']}
Help on MultiTrack in module medleydb.multitrack object:

class MultiTrack(builtins.object)
 |  MultiTrack(track_id)
 |  
 |  MultiTrack Class definition.
 |  
 |  This class loads all available metadata, annotations, and filepaths for a
 |  given multitrack directory.
 |  
 |  Parameters
 |  ----------
 |  track_id : str
 |      Track id in format 'Artist_Title'.
 |  
 |  Attributes
 |  ----------
 |  artist : str
 |      The artist of the multitrack
 |  title : str
 |      The title of the multitrack
 |  track_id : str
 |      The unique identifier of the multitrack. In the form 'Artist_Title'
 |  annotation_dir : str
 |      Path to multitrack's annotation directory
 |  audio_path : str
 |      Path to multitrack's top level audio directory
 |  mix_path : str
 |      Path to multitrack's mix file.
 |  melody_rankings : dictionary
 |      Dictionary of melody rankings keyed by stem id
 |  melody1_fpath : str
 |

In [13]:
#test_dimensions_given_params()
preprocess_tracks(matrix_labels = True)

  metadata = yaml.load(f_in)


Loaded 144/196 tracks
Training sample length: 90
Distribution: [55 61 49 52 40 18  8  7  5 11  8  6  4  2  8 11]
Validation sample length: 23
Distribution: [14 15 12 13 10  5  2  2  1  3  2  1  1  1  2  3]
Test sample length 31
Distribution: [18 22 19 16 12  6  3  2  1  3  2  2  1  1  2  4]
Length of track AClassicEducation_NightOwl is: 171
Processing: 1/144



Length of track AimeeNorwich_Child is: 1
Length of track AimeeNorwich_Flying is: 217
Length of track AmarLal_Rest is: 184
Length of track AmarLal_SpringDay1 is: 133
Length of track AvaLuna_Waterduct is: 259
Length of track BigTroubles_Phantom is: 146
Length of track BrandonWebster_DontHearAThing is: 171
Length of track ChrisJacoby_PigsFoot is: 367
Length of track ClaraBerryAndWooldog_AirTraffic is: 173
Length of track ClaraBerryAndWooldog_Boys is: 153
Length of track ClaraBerryAndWooldog_TheBadGuys is: 254
Length of track Debussy_LenfantProdigue is: 223
Length of track DreamersOfTheGhetto_HeavyLove is: 294
Length of track EthanHein_1930sSynthAndUprightBass is: 287
Length of track EthanHein_BluesForNofi is: 166
Length of track EthanHein_GirlOnABridge is: 130
Length of track EthanHein_HarmonicaFigure is: 150
Length of track FamilyBand_Again is: 201
Length of track HeladoNegro_MitadDelMundo is: 181
Length of track HezekiahJones_BorrowedHeart is: 241
Length of track InvisibleFamiliars_Dist

In [2]:
fx_tracks = mdb.get_files_for_instrument('viola')
tracks = mdb.load_multitracks(fx_tracks)

NameError: name 'mdb' is not defined

In [1]:
#t = next(fx_tracks)
#play_stem(t)

In [85]:
print(t)

/home/carlnys/data/medleydb/medleydb/Audio/EthanHein_GirlOnABridge/EthanHein_GirlOnABridge_STEMS/EthanHein_GirlOnABridge_STEM_04.wav
