In [1]:
import glob
import math
import os
import random

import keras
from keras.layers import *
from keras.models import *
from keras.optimizers import *
from keras import backend as K
from keras import metrics
import mido
from mido import Message, MetaMessage, MidiFile, MidiTrack
import numpy
import sklearn.utils

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
numpy.set_printoptions(threshold=numpy.nan)

In [3]:
random.seed(0)
numpy.random.seed(0)

In [None]:
INPUT_WIDTH = 1 + 8 + 8 + 16
LOOKBACK = 128
DEFAULT_TICKS = 480

In [None]:
class VAE(object): # technically not an autoencoder
    def create(self, vocab_size=INPUT_WIDTH, max_length=LOOKBACK, latent_rep_size=128, lr=0.001):
        self.encoder = None
        self.predictor = None
        #self.sentiment_predictor = None
        self.autoencoder = None

        x = Input(shape=(max_length, vocab_size))
        #x_embed = Embedding(vocab_size, 64, input_length=max_length)(x)

        vae_loss, encoded = self._build_encoder(x, latent_rep_size=latent_rep_size, max_length=max_length)
        self.encoder = Model(inputs=x, outputs=encoded)

        encoded_input = Input(shape=(latent_rep_size,))
#         predicted_sentiment = self._build_sentiment_predictor(encoded_input)
#         self.sentiment_predictor = Model(encoded_input, predicted_sentiment)

        pred = self._build_predictor(encoded_input, vocab_size, max_length)
        self.predictor = Model(encoded_input, pred)

        self.autoencoder = Model(inputs=x, outputs=self._build_predictor(encoded, vocab_size, max_length))
        self.autoencoder.compile(optimizer=Adam(lr=lr),
                                 loss=vae_loss,
                                 metrics=['accuracy'])
    
    def _build_encoder(self, x, latent_rep_size=128, max_length=None, epsilon_std=0.01):
        h = Bidirectional(LSTM(500, return_sequences=True), merge_mode='concat')(x)
        h = Dropout(0.5)(h)
        h = Bidirectional(LSTM(500, return_sequences=False), merge_mode='concat')(h)
        h = Dropout(0.5)(h)
        h = Dense(435, activation='relu')(h)

        def sampling(args):
            z_mean_, z_log_var_ = args
            batch_size = K.shape(z_mean_)[0]
            epsilon = K.random_normal(shape=(batch_size, latent_rep_size), mean=0., stddev=epsilon_std)
            return z_mean_ + K.exp(z_log_var_ / 2) * epsilon

        z_mean = Dense(latent_rep_size, name='z_mean', activation='linear')(h)
        z_log_var = Dense(latent_rep_size, name='z_log_var', activation='linear')(h)

        def vae_loss(y, pred):
            y = K.flatten(y)
            pred = K.flatten(pred)
            xent_loss = max_length * metrics.binary_crossentropy(y, pred)
            kl_loss = - 0.5 * K.mean(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
            return xent_loss + kl_loss

        return (vae_loss, Lambda(sampling, output_shape=(latent_rep_size,), name='lambda')([z_mean, z_log_var]))
    
    def _build_predictor(self, encoded, vocab_size, max_length):
        h = Dense(100, activation='relu')(encoded)
        h = Dropout(0.25)(h)
        h = Dense(50, activation='relu')(h)
        pred = Dense(INPUT_WIDTH, activation='sigmoid', name='pred')(h)
#         repeated_context = RepeatVector(max_length)(encoded)

#         h = LSTM(500, return_sequences=True, name='dec_lstm_1')(repeated_context)
#         #h = Dropout(0.5, name='dec_dropout_1')(h)
#         h = LSTM(500, return_sequences=True, name='dec_lstm_2')(h)
#         #h = Dropout(0.5, name='dec_dropout_2')(h)

#         decoded = TimeDistributed(Dense(vocab_size, activation='sigmoid'), name='decoded_mean')(h)

        return pred
    
#     def _build_sentiment_predictor(self, encoded):
#         h = Dense(100, activation='linear')(encoded)

#         return Dense(INPUT_WIDTH, activation='sigmoid', name='pred')(h)

In [None]:
model = VAE()
model.create()
model.autoencoder.summary()

In [None]:
def load_midi(midi_file, track_name):
    data = []
    midi = mido.MidiFile(midi_file)
    for track in midi.tracks:
        if track.name == track_name:
            for message in track:
                if message.type in ['note_on', 'note_off']:
                    data.append((1 if message.type == 'note_on' else 0, message.note, message.velocity, int(message.time * DEFAULT_TICKS / midi.ticks_per_beat)))
    assert data
    return numpy.array(data)

In [None]:
def augment(data):
    augmented = []
    events = len(data)
    maximum = data.max(axis=0)[1]
    minimum = data.min(axis=0)[1]
    transpositions = 128 - (maximum - minimum)
    for i in range(transpositions):
        sequence = numpy.copy(data)
        for j in range(events):
            sequence[j, 1] = data[j, 1] - minimum + i
        augmented.append(sequence)
    return augmented

In [None]:
def encode(data):
    encoded = []
    for sequence in data:
        encoded_sequence = numpy.zeros((len(sequence), INPUT_WIDTH), dtype=int)
        for i, event in enumerate(sequence):
            encoded_sequence[i, 0] = event[0]
            encoded_sequence[i, 1:9] = [int(x) for x in format(event[1], '08b')]
            encoded_sequence[i, 9:17] = [int(x) for x in format(event[2], '08b')]
            encoded_sequence[i, 17:] = [int(x) for x in format(event[3], '016b')]
        encoded.append(encoded_sequence)
    return encoded

In [None]:
def prepare(data):
    X = []
    Y = []
    for sequence in data:
        for i in range(len(sequence)):
            if i == 0:
                segment = numpy.zeros((1, INPUT_WIDTH), dtype=int)
            else:
                segment = sequence[max(i - LOOKBACK, 0):i, :]
            if len(segment) < LOOKBACK:
                pad = LOOKBACK - len(segment)
                segment = numpy.pad(segment, [(pad, 0), (0, 0)], mode='constant')
            #prepared.append((X, sequence[i, :]))
            X.append(segment)
            Y.append(sequence[i, :])
    X = numpy.array(X)
    Y = numpy.array(Y)
    return X, Y

In [None]:
def load_all(midi_dir, track_name):
    X = []
    Y = []
    midi_files = sorted(glob.glob(os.path.join(midi_dir, '*.mid')) + glob.glob(os.path.join(midi_dir, '*.midi')))
    for midi_file in midi_files:
        try:
            data = prepare(encode(augment(load_midi(midi_file, track_name))))
            X.extend(data[0])
            Y.extend(data[1])
        except (KeyboardInterrupt, SystemExit):
            raise
        except:
            print("Skipping", midi_file)
    #random.shuffle(all_data)
    X = numpy.array(X)
    Y = numpy.array(Y)
    return X, Y

In [None]:
#data = load_midi('/home/santiago/Projects/ProjectEuterpe/data/midi/1.mid', 'Chords')

In [None]:
#data

In [None]:
#data = augment(data)

In [None]:
#data

In [None]:
#data = encode(data)

In [None]:
#data

In [None]:
#data = prepare(data)

In [None]:
#data

In [None]:
#len(data)

In [None]:
midi_dir = '/home/santiago/Projects/ProjectEuterpe/data/midi/'

In [None]:
data = load_all(midi_dir, 'Chords')

In [None]:
data[0].shape, data[1].shape

In [None]:
def generator(X, Y, batch_size=32, shuffle=True, random_seed=0):
    assert len(X) == len(Y)
    while True:
        if shuffle:
            X, Y = sklearn.utils.shuffle(X, Y, random_state=random_seed)
        for i in range(0, len(X), batch_size):
            yield X[i:i + batch_size, :, :], Y[i:i + batch_size, :]

In [None]:
BATCH_SIZE = 256

In [None]:
gen = generator(data[0], data[1], batch_size=BATCH_SIZE)

In [None]:
checkpoint_dir = '/home/santiago/Projects/ProjectEuterpe/checkpoints/chords/vae_binary/'

In [None]:
model.autoencoder.load_weights('/home/santiago/Projects/ProjectEuterpe/checkpoints/chords/vae_binary/epoch90.hdf5')

In [None]:
epoch = 0
while True:
    epoch += 1
    print('Epoch', epoch)
    model.autoencoder.fit_generator(gen, math.ceil(len(data[0]) / BATCH_SIZE), epochs=1)
    model.autoencoder.save_weights(os.path.join(checkpoint_dir, 'epoch{}.hdf5'.format(epoch)))

In [None]:
def bits_to_int(bits):
    out = 0
    for bit in bits:
        out = (out << 1) | bit
    return out

In [None]:
def to_midi(data, note_offset=0):
    midi = MidiFile()
    track = MidiTrack()
    midi.tracks.append(track)
    #track.append(MetaMessage('set_tempo', tempo=500000, time=0))
    for event in data:
        onoff = 'note_on' if event[0] == 1 else 'note_off'
        note = bits_to_int(event[1:9]) + note_offset
        velocity = bits_to_int(event[9:17])
        time = bits_to_int(event[17:])
        track.append(Message(onoff, note=note, velocity=velocity, time=time))
    return midi

In [None]:
history = numpy.zeros((1, LOOKBACK, INPUT_WIDTH), dtype=int)
#history = numpy.random.randint(2, size=(1, LOOKBACK, INPUT_WIDTH), dtype=int)
for i in range(400):
    history = numpy.concatenate([history, numpy.round(model.autoencoder.predict(history[:, -LOOKBACK:, :])).astype(int)[:, numpy.newaxis, :]], axis=1)
history = history[0, LOOKBACK:, :]
history

In [None]:
midi = to_midi(history, 40)

In [None]:
list(midi.tracks[0])

In [None]:
midi.save('/home/santiago/Projects/ProjectEuterpe/data/test/test_vae_binary15.mid')