## Data Augment
- [ ] Data augmentation on a single pair of datum
- [ ] Generalize data augmentation
- [ ] A generator that transposes randomly

In [109]:
import pumpp
import pescador
import crema.utils
import numpy as np
import librosa

In [20]:
test_data = crema.utils.load_h5('/Users/tom/Music/GS_features/00_BN1-129-Eb_duo_mic.h5')
pump = make_pump()

In [133]:
class TransSampler(pumpp.sampler.Sampler):
    def __init__(self, n_samples, duration, *ops, **kwargs):
        super(TransSampler, self).__init__(n_samples, duration, *ops, **kwargs)
        
        ktt = pumpp.task.KeyTagTransformer(sparse=True)
        self.tag_encoder = ktt.encoder
    
    def sample(self, data, interval):
        '''Sample a patch from the data object

        Parameters
        ----------
        data : dict
            A data dict as produced by pumpp.Pump.transform

        interval : slice
            The time interval to sample

        Returns
        -------
        data_slice : dict
            `data` restricted to `interval`.
        '''
        data_slice = super(TransSampler, self).sample(data, interval)
        
        transpose_amount = self.rng.randint(0, 12)
        
        for key in data_slice:
            if 'chord' in key:
                if data_slice[key].shape[2] == 13:
                    to_transpose = data_slice[key][:, :, :-1, :]
                    data_slice[key][:, :, :-1, :] = np.roll(to_transpose, transpose_amount, 2)
                elif data_slice[key].shape[2] == 12:
                    data_slice[key] = np.roll(data_slice[key], transpose_amount, 2)
                else:
                    raise IndexError            
        
        key_tags = data_slice['key_tag/tag']
        tonics = [keytag.split(':')[0] for keytag in self.tag_encoder.inverse_transform(key_tags.squeeze())]
        modes = [keytag.split(':')[1] for keytag in self.tag_encoder.inverse_transform(key_tags.squeeze())]
        new_tonics = librosa.midi_to_note(librosa.note_to_midi(tonics) + transpose_amount, octave=False)

        for i, new_tonic in enumerate(new_tonics):
            new_key = ':'.join([new_tonic, modes[i]])
            data_slice['key_tag/tag'][0, i, :] = self.tag_encoder.transform([new_key])
            profile, tonic = pumpp.task.key._encode_key_str(new_key, True)
            data_slice['key_struct/pitch_profile'][0, i, :] = profile
            data_slice['key_struct/tonic'][0, i, :] = tonic
        
        return data_slice
  

In [134]:
trans_sampler = TransSampler(12, 50, random_state=20190924, *pump.ops)

In [135]:
data_slice = next(trans_sampler(test_data))

In [137]:
for key in data_slice:
    print(key, data_slice[key].shape)

chord_struct/bass (1, 50, 13, 1)
chord_struct/pitch (1, 50, 12, 1)
chord_struct/root (1, 50, 13, 1)
key_struct/pitch_profile (1, 50, 12)
key_struct/tonic (1, 50, 1)
key_tag/tag (1, 50, 1)


In [90]:
ktt = pumpp.task.KeyTagTransformer(sparse=True)
kt = pumpp.task.KeyTransformer(sparse=True)

In [98]:
string = ktt.encoder.transform() inverse_transform(key_tag_slice.squeeze())[0]
'D' in string

True

In [97]:
_encode_key_str(string, True)

(array([1., 0., 1., 1., 0., 1., 0., 1., 1., 0., 1., 0.]), 3)

In [122]:
tonics = [keytag.split(':')[0] for keytag in ktt.encoder.inverse_transform(key_tag_slice.squeeze())]
modes = [keytag.split(':')[1] for keytag in ktt.encoder.inverse_transform(key_tag_slice.squeeze())]
new_tonics = librosa.midi_to_note(librosa.note_to_midi(tonics) + transpose_amount, octave=False)

for i, new_tonic in enumerate(new_tonics):
    new_key = ':'.join([new_tonic, modes[i]])
    data_slice['key_tag/tag'][0, i, :] = new_key
    profile, tonic = _encode_key_str(new_key, True)
    data_slice['key_struct/pitch_profile'][0, i, :] = profile
    data_slice['key_struct/tonic'][0, i, :] = tonic

In [53]:
to_transpose = bass_slice[:, :, :-1, :]
bass_slice[:, :, :-1, :] = np.roll(to_transpose, transpose_amount, 2)
to_transpose.shape

(1, 50, 12, 1)

In [54]:
transpose_amount = np.random.randint(0,12)
bass_slice[:, :, :-1, :] = np.roll(to_transpose, transpose_amount, 2)

array([[[[8.92261276e-04],
         [6.07730169e-03],
         [8.18887667e-04],
         [1.22523047e-02],
         [7.60432243e-01],
         [1.34902028e-03],
         [6.89479802e-03],
         [2.88591185e-03],
         [4.94817225e-03],
         [9.45042223e-02],
         [1.23093128e-02],
         [9.63288248e-02]],

        [[9.99674317e-04],
         [5.09656267e-03],
         [7.77397945e-04],
         [1.23472102e-02],
         [7.66087890e-01],
         [1.14939979e-03],
         [7.38017214e-03],
         [3.72485979e-03],
         [5.63454628e-03],
         [8.80874172e-02],
         [1.13477604e-02],
         [9.70177725e-02]],

        [[8.57816776e-04],
         [4.95011592e-03],
         [7.34887202e-04],
         [1.25276456e-02],
         [7.55448163e-01],
         [1.12185394e-03],
         [7.46424403e-03],
         [2.35026679e-03],
         [5.56454016e-03],
         [8.05902779e-02],
         [1.14960736e-02],
         [1.16431311e-01]],

        [[8.16228159e-

###  Now build training_gen and validation_gen with new files

In [173]:
from pescador.maps import buffer_stream, keras_tuples
import pandas as pd
import keras
import os, pickle

import data_pipes as dp
import make_models as mm

In [187]:
DATA_HOME = "/Users/tom/Music/GuitarSet/"
OUTPUT_PATH = 'resources'
working = '/Users/tom/Music/GS_features/'
max_samples = 12
duration = 12
poisson_rate = 4
batch_size = 24
epochs = 512
epoch_size = 100
reduce_lr = 8
early_stopping = 32
seed = 20190924

In [188]:
pump = mm.make_pump()

In [189]:
# Build the model
model, inputs, outputs = mm.construct_model(pump)

In [190]:
# Build the sampler
sampler = dp.train_sampler(max_samples, duration, pump, seed)

gs_index = pd.read_json('dataset_indecies/guitarset_index.json')
val_index = pd.read_json('dataset_indecies/gs_val_idx.json')[0]
train_index = pd.read_json('dataset_indecies/gs_train_idx.json')[0]

In [191]:
val_solo_basenames = [crema.utils.base(gs_index.audio_mic_solo[idx]) for idx in val_index]
val_comp_basenames = [crema.utils.base(gs_index.audio_mic_comp[idx]) for idx in val_index]
val_duo_basenames = [crema.utils.base(gs_index.audio_mic_duo[idx]) for idx in val_index]
val_track_ids = val_solo_basenames + val_comp_basenames + val_duo_basenames

train_solo_basenames = [crema.utils.base(gs_index.audio_mic_solo[idx]) for idx in train_index]
train_comp_basenames = [crema.utils.base(gs_index.audio_mic_comp[idx]) for idx in train_index]
train_duo_basenames = [crema.utils.base(gs_index.audio_mic_duo[idx]) for idx in train_index]
train_track_ids = train_solo_basenames + train_comp_basenames + train_duo_basenames

In [192]:
gen_train = dp.data_generator(working, train_track_ids, sampler, epoch_size,
                           augment=False,
                           rate=poisson_rate,
                           mode='with_replacement',
                           random_state=seed)

gen_train = keras_tuples(buffer_stream(gen_train(), batch_size, axis=0),
                         inputs=inputs,         
                         outputs=outputs)

In [193]:
gen_val = dp.val_generator(working, val_track_ids, augment=False)
validation_size = gen_val.n_streams

gen_val = keras_tuples(gen_val(), inputs=inputs, outputs=outputs)

In [194]:
loss = {'key_tag': 'sparse_categorical_crossentropy'}
metrics = {'key_tag': 'sparse_categorical_accuracy'}

loss.update(key_profile='binary_crossentropy',
            key_tonic='sparse_categorical_crossentropy')

monitor = 'val_key_tag_sparse_categorical_accuracy'

#sgd = K.optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
adam = keras.optimizers.Adam()
model.compile(adam, loss=loss, metrics=metrics)

# Store the model
model_spec = keras.utils.serialize_keras_object(model)
with open(os.path.join(OUTPUT_PATH, 'model_spec.pkl'), 'wb') as fd:
    pickle.dump(model_spec, fd)

# Construct the weight path
weight_path = os.path.join(OUTPUT_PATH, 'gs_model.h5')

# Build the callbacks
cb = []
cb.append(keras.callbacks.ModelCheckpoint(weight_path,
                                          save_best_only=True,
                                          verbose=1,
                                          monitor=monitor))

cb.append(keras.callbacks.ReduceLROnPlateau(patience=reduce_lr,
                                            verbose=1,
                                            monitor=monitor))

cb.append(keras.callbacks.EarlyStopping(patience=early_stopping,
                                        verbose=1,
                                        monitor=monitor))

cb.append(keras.callbacks.TensorBoard(log_dir='./logs/gs_model', batch_size=batch_size, update_freq='epoch'))

In [None]:
history = model.fit_generator(gen_train, epoch_size, epochs,
                              validation_data=gen_val,
                              validation_steps=validation_size,
                              callbacks=cb)

Epoch 1/512

Epoch 00001: val_key_tag_sparse_categorical_accuracy improved from -inf to 0.61180, saving model to resources/gs_model.h5
Epoch 2/512

Epoch 00002: val_key_tag_sparse_categorical_accuracy improved from 0.61180 to 0.63425, saving model to resources/gs_model.h5
Epoch 3/512

Epoch 00003: val_key_tag_sparse_categorical_accuracy improved from 0.63425 to 0.67915, saving model to resources/gs_model.h5
Epoch 4/512

Epoch 00004: val_key_tag_sparse_categorical_accuracy improved from 0.67915 to 0.68295, saving model to resources/gs_model.h5
Epoch 5/512

Epoch 00005: val_key_tag_sparse_categorical_accuracy did not improve from 0.68295
Epoch 6/512

Epoch 00006: val_key_tag_sparse_categorical_accuracy did not improve from 0.68295
Epoch 7/512

Epoch 00007: val_key_tag_sparse_categorical_accuracy improved from 0.68295 to 0.68502, saving model to resources/gs_model.h5
Epoch 8/512

Epoch 00008: val_key_tag_sparse_categorical_accuracy improved from 0.68502 to 0.72678, saving model to resourc