In [6]:
%load_ext autoreload

In [7]:
%autoreload 2

In [5]:
from train_utils import make_pump

Using TensorFlow backend.


## Train and Eval
- [ ] generate test set indices
- [ ] train with new data
- [ ] Evaluation

## Data Augment
- [ ] Data augmentation on a single pair of datum
- [ ] Generalize data augmentation

## GuitarSet Data
- [x] How to use comp and solo?
- [x] Evaluate on solo, and mix separately
- [x] Regenerate data for mixture via prepare


## Prepare AMAPs
- [x] Download AMAPS
- [x] Prepares them into audio files and jams files 
- [x] Run through prepare
- [x] Test current models (madmom and crema) on AMAPs and get some numbers

In [12]:
AMAPS_ROOT = '/Users/tom/Music/A-MAPS/'

In [302]:
import glob, os, json
import pandas as pd
import pretty_midi as pm
import numpy as np
import jams
import librosa
import tqdm
import soundfile

In [264]:
def make_amaps_index(amaps_root):
    midi_files = glob.glob(os.path.join(amaps_root, '*.mid'))
    wav_files = glob.glob(os.path.join(amaps_root, '*.wav'))
    track_ids = sorted([os.path.basename(f).split('.')[0] for f in midi_files])
    
    amaps_index = {}
    for track_id in track_ids:
        amaps_index[track_id] = {
            'audio': '{}.wav'.format(track_id),
            'midi': '{}.mid'.format(track_id),
            'jams': '{}.jams'.format(track_id)
        }
        
    return amaps_index

In [265]:
amaps_index = make_amaps_index(AMAPS_ROOT)

In [267]:
amaps_df = pd.DataFrame(data=amaps_index).T
amaps_df.to_json('dataset_indecies/amaps_index.json')

In [268]:
pieces = {}
for raw_idx in amaps_df.index:
    idx_string_list = raw_idx.split('_')
    version = idx_string_list.pop()
    idx = '_'.join(idx_string_list)
    try:
        pieces[idx].append(version)
    except KeyError:
        pieces[idx] = [version]

In [269]:
len(pieces)

158

In [270]:
spliter = ShuffleSplit(n_splits=1, test_size=0.2, random_state=20190921)

In [271]:
train_idx, test_idx = next(spliter.split(pieces))
train_pieces = np.asarray(sorted(pieces.keys()))[train_idx]
test_pieces = np.asarray(sorted(pieces.keys()))[test_idx]

In [272]:
amaps_train_idx = []
amaps_test_idx = []

for piece in test_pieces:
    for version in pieces[piece]:
        amaps_test_idx.append('_'.join([piece, version]))
        
for piece in train_pieces:
    for version in pieces[piece]:
        amaps_train_idx.append('_'.join([piece, version]))

In [273]:
len(amaps_train_idx), len(amaps_test_idx)

(216, 50)

In [274]:
with open('dataset_indecies/amaps_train_idx.json', 'w') as fp:
    json.dump(amaps_train_idx, fp, indent=2)
    
with open('dataset_indecies/amaps_val_idx.json', 'w') as fp:
    json.dump(amaps_test_idx, fp, indent=2)

### Clean up guitarset_index

In [146]:
gs_df = pd.read_json('dataset_indecies/guitarset_index.json').T

In [155]:
gs_solo_df = gs_df.filter(like='solo', axis=0).filter(items=['audio_mic', 'audio_mix', 'jams'])
gs_solo_df.rename(columns={'audio_mic': 'audio_mic_solo',
                           'audio_mix': 'audio_mix_solo',
                           'jams': 'jams_solo'},
                  index=lambda x: x[:-5],
                  inplace=True)

gs_solo_df = gs_solo_df.applymap(lambda x: x[0])

In [156]:
gs_comp_df = gs_df.filter(like='comp', axis=0).filter(items=['audio_mic', 'audio_mix', 'jams'])
gs_comp_df.rename(columns={'audio_mic': 'audio_mic_comp',
                           'audio_mix': 'audio_mix_comp',
                           'jams': 'jams_comp'},
                  index=lambda x: x[:-5],
                  inplace=True)

gs_comp_df = gs_comp_df.applymap(lambda x: x[0])

In [159]:
gs_df = gs_comp_df.join(gs_solo_df)

Unnamed: 0,audio_mic_comp,audio_mix_comp,jams_comp,audio_mic_solo,audio_mix_solo,jams_solo
00_BN1-129-Eb,audio_mono-mic/00_BN1-129-Eb_comp_mic.wav,audio_mono-pickup_mix/00_BN1-129-Eb_comp_mix.wav,annotation/00_BN1-129-Eb_comp.jams,audio_mono-mic/00_BN1-129-Eb_solo_mic.wav,audio_mono-pickup_mix/00_BN1-129-Eb_solo_mix.wav,annotation/00_BN1-129-Eb_solo.jams
00_BN1-147-Gb,audio_mono-mic/00_BN1-147-Gb_comp_mic.wav,audio_mono-pickup_mix/00_BN1-147-Gb_comp_mix.wav,annotation/00_BN1-147-Gb_comp.jams,audio_mono-mic/00_BN1-147-Gb_solo_mic.wav,audio_mono-pickup_mix/00_BN1-147-Gb_solo_mix.wav,annotation/00_BN1-147-Gb_solo.jams
00_BN2-131-B,audio_mono-mic/00_BN2-131-B_comp_mic.wav,audio_mono-pickup_mix/00_BN2-131-B_comp_mix.wav,annotation/00_BN2-131-B_comp.jams,audio_mono-mic/00_BN2-131-B_solo_mic.wav,audio_mono-pickup_mix/00_BN2-131-B_solo_mix.wav,annotation/00_BN2-131-B_solo.jams
00_BN2-166-Ab,audio_mono-mic/00_BN2-166-Ab_comp_mic.wav,audio_mono-pickup_mix/00_BN2-166-Ab_comp_mix.wav,annotation/00_BN2-166-Ab_comp.jams,audio_mono-mic/00_BN2-166-Ab_solo_mic.wav,audio_mono-pickup_mix/00_BN2-166-Ab_solo_mix.wav,annotation/00_BN2-166-Ab_solo.jams
00_BN3-119-G,audio_mono-mic/00_BN3-119-G_comp_mic.wav,audio_mono-pickup_mix/00_BN3-119-G_comp_mix.wav,annotation/00_BN3-119-G_comp.jams,audio_mono-mic/00_BN3-119-G_solo_mic.wav,audio_mono-pickup_mix/00_BN3-119-G_solo_mix.wav,annotation/00_BN3-119-G_solo.jams
...,...,...,...,...,...,...
05_SS1-68-E,audio_mono-mic/05_SS1-68-E_comp_mic.wav,audio_mono-pickup_mix/05_SS1-68-E_comp_mix.wav,annotation/05_SS1-68-E_comp.jams,audio_mono-mic/05_SS1-68-E_solo_mic.wav,audio_mono-pickup_mix/05_SS1-68-E_solo_mix.wav,annotation/05_SS1-68-E_solo.jams
05_SS2-107-Ab,audio_mono-mic/05_SS2-107-Ab_comp_mic.wav,audio_mono-pickup_mix/05_SS2-107-Ab_comp_mix.wav,annotation/05_SS2-107-Ab_comp.jams,audio_mono-mic/05_SS2-107-Ab_solo_mic.wav,audio_mono-pickup_mix/05_SS2-107-Ab_solo_mix.wav,annotation/05_SS2-107-Ab_solo.jams
05_SS2-88-F,audio_mono-mic/05_SS2-88-F_comp_mic.wav,audio_mono-pickup_mix/05_SS2-88-F_comp_mix.wav,annotation/05_SS2-88-F_comp.jams,audio_mono-mic/05_SS2-88-F_solo_mic.wav,audio_mono-pickup_mix/05_SS2-88-F_solo_mix.wav,annotation/05_SS2-88-F_solo.jams
05_SS3-84-Bb,audio_mono-mic/05_SS3-84-Bb_comp_mic.wav,audio_mono-pickup_mix/05_SS3-84-Bb_comp_mix.wav,annotation/05_SS3-84-Bb_comp.jams,audio_mono-mic/05_SS3-84-Bb_solo_mic.wav,audio_mono-pickup_mix/05_SS3-84-Bb_solo_mix.wav,annotation/05_SS3-84-Bb_solo.jams


In [162]:
gs_df.to_json(path_or_buf='dataset_indecies/guitarset_index.json')

In [163]:
amaps_df.to_json(path_or_buf='dataset_indecies/amaps_index.json')

### Do GuitarSet Splits

In [175]:
gs_train_idx, gs_test_idx = next(spliter.split(gs_df.index))
gs_train_pieces = np.asarray(sorted(gs_df.index))[gs_train_idx]
gs_test_pieces = np.asarray(sorted(gs_df.index))[gs_test_idx]

In [177]:
with open('dataset_indecies/gs_train_idx.json', 'w') as fp:
    json.dump(list(gs_train_pieces), fp, indent=2)
    
with open('dataset_indecies/gs_val_idx.json', 'w') as fp:
    json.dump(list(gs_test_pieces), fp, indent=2)

### make jams file from AMAPS midi

In [287]:
amaps_df.audio['MAPS_MUS-alb_esp2_AkPnCGdD']

'MAPS_MUS-alb_esp2_AkPnCGdD.wav'

In [275]:
def midi_to_key_jams(amaps_piece, amaps_df):
    audio_path = os.path.join(AMAPS_ROOT, amaps_df.audio[amaps_piece])
    midi_path = os.path.join(AMAPS_ROOT, amaps_df.midi[amaps_piece])
    jams_path = os.path.join(AMAPS_ROOT, amaps_df.jams[amaps_piece])
    
    audio_dur = librosa.get_duration(filename=audio_path)
    midi = pm.PrettyMIDI(midi_file=midi_path)
    ann = jams.Annotation('key_mode', duration=audio_dur)
    
    # build lists for annotation construction
    start_times = []
    values = []
    for key_sig in midi.key_signature_changes:
        start_times.append(key_sig.time)
        key, mode = pm.key_number_to_key_name(key_sig.key_number).split(' ')
        values.append(':'.join([key, mode.lower()]))

    end_times = start_times[1:] + [audio_dur]
    
    # build annotation
    for st, et, val in zip(start_times, end_times, values):
        ann.append(time=st, duration=et-st, value=val)
        
    # build and save jams
    jam = jams.JAMS(annotations=[ann])
    jam.file_metadata.duration = ann.duration
    jam.file_metadata.title = piece
    jam.save(jams_path)

In [276]:
for piece in tqdm.tqdm(amaps_df.index):
    midi_to_key_jams(piece, amaps_df)

100%|██████████| 266/266 [00:32<00:00,  3.55it/s]


### Prepare amaps into jams and h5s

In [279]:
from jams.util import smkdirs
import pumpp
import crema.utils

In [322]:
def convert(aud, jam, pump, outdir):
    data = pump.transform(aud, jam)
    fname = os.path.extsep.join([os.path.join(outdir, crema.utils.base(aud)),
                                'h5'])
    crema.utils.save_h5(fname, **data)
    
def get_ann_audio_amaps(data_home, index_json_path):
    audio_paths = []
    anno_paths = []
    
    index_df = pd.read_json(index_json_path)
    for piece in index_df.index:
        aud_path = os.path.join(data_home, index_df.audio[piece])
        jams_path = os.path.join(data_home, index_df.jams[piece])
        assert os.path.isfile(aud_path) and os.path.isfile(jams_path)
        audio_paths.append(aud_path)
        anno_paths.append(jams_path)

    paired = list(zip(audio_paths, anno_paths))
    return paired  

In [292]:
output_path = '/Users/tom/Music/A-MAPS_feature/'
data_home = '/Users/tom/Music/A-MAPS/'
index_json = 'dataset_indecies/amaps_index.json'
smkdirs(output_path)

pump = train_utils.make_pump()
stream = tqdm.tqdm(get_ann_audio_amaps(data_home, index_json),
                   desc='Converting audio data')
for aud, jam in stream:
    convert(aud, jam, pump, output_path)

Converting audio data: 100%|██████████| 266/266 [43:01<00:00, 17.87s/it] 


### Generate mixture set

In [294]:
index_df = pd.read_json('dataset_indecies/guitarset_index.json')
index_df.keys()

Index(['audio_mic_comp', 'audio_mix_comp', 'jams_comp', 'audio_mic_solo',
       'audio_mix_solo', 'jams_solo'],
      dtype='object')

In [295]:
index_df.audio_mic_comp

00_BN1-129-Eb    audio_mono-mic/00_BN1-129-Eb_comp_mic.wav
00_BN1-147-Gb    audio_mono-mic/00_BN1-147-Gb_comp_mic.wav
00_BN2-131-B      audio_mono-mic/00_BN2-131-B_comp_mic.wav
00_BN2-166-Ab    audio_mono-mic/00_BN2-166-Ab_comp_mic.wav
00_BN3-119-G      audio_mono-mic/00_BN3-119-G_comp_mic.wav
                                   ...                    
05_SS1-68-E        audio_mono-mic/05_SS1-68-E_comp_mic.wav
05_SS2-107-Ab    audio_mono-mic/05_SS2-107-Ab_comp_mic.wav
05_SS2-88-F        audio_mono-mic/05_SS2-88-F_comp_mic.wav
05_SS3-84-Bb      audio_mono-mic/05_SS3-84-Bb_comp_mic.wav
05_SS3-98-C        audio_mono-mic/05_SS3-98-C_comp_mic.wav
Name: audio_mic_comp, Length: 180, dtype: object

In [309]:
gs_index = pd.read_json('dataset_indecies/guitarset_index.json')

In [310]:
gs_index.keys()

Index(['audio_mic_comp', 'audio_mix_comp', 'jams_comp', 'audio_mic_solo',
       'audio_mix_solo', 'jams_solo'],
      dtype='object')

In [315]:
duo_paths = []
for fpath in gs_index.audio_mic_solo:
    duo_path_list = fpath.split('_')
    duo_path_list[-2] = 'duo'
    path = '_'.join(duo_path_list)
    duo_paths.append(path) 
    

In [316]:
gs_index['audio_mic_duo'] = duo_paths

In [318]:
gs_index.to_json(path_or_buf='dataset_indecies/guitarset_index.json')

In [320]:
def get_ann_audio_gs_duo(data_home, index_json_path):
    audio_paths = []
    anno_paths = []
    
    index_df = pd.read_json(index_json_path)
    for piece in index_df.index:
        aud_path = os.path.join(data_home, index_df.audio_mic_duo[piece])
        jams_path = os.path.join(data_home, index_df.jams_solo[piece])
        assert (os.path.isfile(aud_path) and 
                os.path.isfile(jams_path))
        
        audio_paths.append(aud_path)
        anno_paths.append(jams_path)

    pair = list(zip(audio_paths, anno_paths))
    return pair
    
def make_duo(aud_solo, aud_comp, outdir):
    solo, sr = librosa.load(aud_solo, sr=None)
    comp, sr = librosa.load(aud_comp, sr=None)
    
    basename_list = crema.utils.base(aud_solo).split('_')
    basename_list[-2] = 'duo'
    basename = '_'.join(basename_list)
    fname = os.path.extsep.join([os.path.join(outdir, basename),
                                'wav'])
    
    soundfile.write(fname, solo+comp, sr)

In [323]:
output_path = '/Users/tom/Music/GS_features/'
data_home = '/Users/tom/Music/GuitarSet/'
index_json = 'dataset_indecies/guitarset_index.json'
smkdirs(output_path)

pump = train_utils.make_pump()
stream = tqdm.tqdm(get_ann_audio_gs_duo(data_home, index_json),
                   desc='Converting audio data')

for aud_path, jam in stream:
    convert(aud_path, jam, pump, output_path)





Converting audio data:   0%|          | 0/180 [00:00<?, ?it/s][A[A[A[A



Converting audio data:   1%|          | 1/180 [00:01<04:48,  1.61s/it][A[A[A[A



Converting audio data:   1%|          | 2/180 [00:02<04:27,  1.50s/it][A[A[A[A



Converting audio data:   2%|▏         | 3/180 [00:04<04:34,  1.55s/it][A[A[A[A



Converting audio data:   2%|▏         | 4/180 [00:05<04:23,  1.50s/it][A[A[A[A



Converting audio data:   3%|▎         | 5/180 [00:08<05:13,  1.79s/it][A[A[A[A



Converting audio data:   3%|▎         | 6/180 [00:10<05:21,  1.85s/it][A[A[A[A



Converting audio data:   4%|▍         | 7/180 [00:11<05:03,  1.76s/it][A[A[A[A



Converting audio data:   4%|▍         | 8/180 [00:13<05:19,  1.86s/it][A[A[A[A



Converting audio data:   5%|▌         | 9/180 [00:16<05:36,  1.97s/it][A[A[A[A



Converting audio data:   6%|▌         | 10/180 [00:18<05:54,  2.08s/it][A[A[A[A



Converting audio data:   6%|▌         | 11/180 [00:20<05:

In [324]:
np.roll([1, 2, 3, 4, 5], 2)

array([4, 5, 1, 2, 3])