In [1]:
import glob
import math
import os
import random

import keras
from keras.layers import *
from keras.models import *
from keras.optimizers import *
from keras import backend as K
from keras import metrics
import mido
import numpy

Using TensorFlow backend.


In [2]:
numpy.set_printoptions(threshold=numpy.nan)

In [3]:
notes = 36
beats = 8
lookback = 64
#input_width = 1 + notes + 8 + 17
input_width = 1 + 8 + 8 + 16

In [None]:
class VAE(object):
    def create(self, vocab_size=input_width, max_length=lookback, latent_rep_size=128, lr=0.001):
        self.encoder = None
        self.decoder = None
        self.sentiment_predictor = None
        self.autoencoder = None

        x = Input(shape=(max_length, vocab_size))
        #x_embed = Embedding(vocab_size, 64, input_length=max_length)(x)

        vae_loss, encoded = self._build_encoder(x, latent_rep_size=latent_rep_size, max_length=max_length)
        self.encoder = Model(inputs=x, outputs=encoded)

        encoded_input = Input(shape=(latent_rep_size,))
        predicted_sentiment = self._build_sentiment_predictor(encoded_input)
        self.sentiment_predictor = Model(encoded_input, predicted_sentiment)

        decoded = self._build_decoder(encoded_input, vocab_size, max_length)
        self.decoder = Model(encoded_input, decoded)

        self.autoencoder = Model(inputs=x, outputs=[self._build_decoder(encoded, vocab_size, max_length), self._build_sentiment_predictor(encoded)])
        self.autoencoder.compile(optimizer=Adam(lr=lr),
                                 loss=[vae_loss, 'binary_crossentropy'],
                                 metrics=['accuracy'])
    
    def _build_encoder(self, x, latent_rep_size=128, max_length=None, epsilon_std=0.01):
        h = Bidirectional(LSTM(500, return_sequences=True, name='lstm_1'), merge_mode='concat')(x)
        h = Dropout(0.5, name='dropout_1')(h)
        h = Bidirectional(LSTM(500, return_sequences=False, name='lstm_2'), merge_mode='concat')(h)
        h = Dropout(0.5, name='dropout_2')(h)
        h = Dense(435, activation='relu', name='dense_1')(h)

        def sampling(args):
            z_mean_, z_log_var_ = args
            batch_size = K.shape(z_mean_)[0]
            epsilon = K.random_normal(shape=(batch_size, latent_rep_size), mean=0., stddev=epsilon_std)
            return z_mean_ + K.exp(z_log_var_ / 2) * epsilon

        z_mean = Dense(latent_rep_size, name='z_mean', activation='linear')(h)
        z_log_var = Dense(latent_rep_size, name='z_log_var', activation='linear')(h)

        def vae_loss(x, x_decoded_mean):
            x = K.flatten(x)
            x_decoded_mean = K.flatten(x_decoded_mean)
            xent_loss = max_length * metrics.binary_crossentropy(x, x_decoded_mean)
            kl_loss = - 0.5 * K.mean(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
            return xent_loss + kl_loss

        return (vae_loss, Lambda(sampling, output_shape=(latent_rep_size,), name='lambda')([z_mean, z_log_var]))
    
    def _build_decoder(self, encoded, vocab_size, max_length):
        repeated_context = RepeatVector(max_length)(encoded)

        h = LSTM(500, return_sequences=True, name='dec_lstm_1')(repeated_context)
        #h = Dropout(0.5, name='dec_dropout_1')(h)
        h = LSTM(500, return_sequences=True, name='dec_lstm_2')(h)
        #h = Dropout(0.5, name='dec_dropout_2')(h)

        decoded = TimeDistributed(Dense(vocab_size, activation='sigmoid'), name='decoded_mean')(h)

        return decoded
    
    def _build_sentiment_predictor(self, encoded):
        h = Dense(100, activation='linear')(encoded)

        return Dense(input_width, activation='sigmoid', name='pred')(h)

In [4]:
default_ticks = 480

In [5]:
random.seed(0)
numpy.random.seed(0)

In [6]:
def shuffle_list(*ls):
    l = list(zip(*ls))
    random.shuffle(l)
    return zip(*l)

In [7]:
def chords_from_midi(midi_file):
    data = []
    midi = mido.MidiFile(midi_file)
    for track in midi.tracks:
        if track.name == 'Chords':
            for message in track:
                if message.type in ['note_on', 'note_off']:
                    data.append((1 if message.type == 'note_on' else 0, message.note, message.velocity, int(message.time * default_ticks / midi.ticks_per_beat)))
    assert data
    return numpy.array(data)


def encode_chords(sequence):
    switches = []
    keys = []
    velocities = []
    times = []
    switch = None
    key = None
    velocity = None
    time = None
    minimum = min(sequence[:, 1])
    for item in sequence:
        if switch != item[0] or velocity != item[2] or item[3] != 0:
            if switch is not None and key is not None and velocity is not None and time is not None:
                switches.append(switch)
                keys.append(key)
                velocities.append([int(x) for x in format(velocity, '08b')])
                times.append([int(x) for x in format(time, '017b')])
            key = numpy.zeros((notes,), dtype=int)
            time = item[3]
        switch = item[0]
        key[item[1] - minimum] = 1
        velocity = item[2]
    return [numpy.array(switches)[:, numpy.newaxis], numpy.array(keys), numpy.array(velocities), numpy.array(times)]


def augment_chords(data):
    augmented = []
    assert len(set([len(data[0]), len(data[1]), len(data[2]), len(data[3])])) == 1
    events = len(data[0])
    
    def high(sequence):
        high = 0
        for event in range(1, events):
            high_candidate = notes - numpy.argmax(sequence[event, ::-1])
            if high_candidate > high:
                high = high_candidate
        return high
    
    maximum = high(data[1])
    transpositions = notes - maximum + 1
    for i in range(transpositions):
        progression = numpy.empty((events, notes), dtype=int)
        for j in range(events):
            progression[j, :] = numpy.concatenate((
                numpy.zeros((i,)),
                data[1][j, :maximum],
                numpy.zeros((notes - maximum - i,))
            ))
        augmented.append([data[0], progression, data[2], data[3]])
    return augmented


def prepare_chords(data):
    assert len(set([len(data[0]), len(data[1]), len(data[2]), len(data[3])])) == 1
    sequences = len(data[0])
    x = [numpy.zeros((sequences, lookback, 1), dtype=int), numpy.zeros((sequences, lookback, notes), dtype=int), numpy.zeros((sequences, lookback, 8), dtype=int), numpy.zeros((sequences, lookback, 17), dtype=int)]
    y = data
    for i in range(1, sequences):
        x[0][i, -i:, :] = data[0][:i, :]
        x[1][i, -i:, :] = data[1][:i, :]
        x[2][i, -i:, :] = data[2][:i, :]
        x[3][i, -i:, :] = data[3][:i, :]
    return x, y


def load_chords(midi_dir):
    all_data = []
    midi_files = sorted(glob.glob(os.path.join(midi_dir, '*.mid')) + glob.glob(os.path.join(midi_dir, '*.midi')))
    for midi_file in midi_files:
        try:
            data = [prepare_chords(x) for x in augment_chords(encode_chords(chords_from_midi(midi_file)))]
            all_data.extend(data)
        except (KeyboardInterrupt, SystemExit):
            raise
        except:
            print("Skipping", midi_file)
    #random.shuffle(all_data)
    return all_data


def concat_chords(data):
    new_data = []
    for sequence in data:
        x = numpy.concatenate(sequence[0], axis=2)
        y = numpy.concatenate(sequence[1], axis=1)
        new_data.append((x, y))
    return new_data


def generator(data, shuffle=True):
    while True:
        if shuffle:
            random.shuffle(data)
        for sequence in data:
            yield (sequence[0][:, :, numpy.newaxis], sequence[1][:, numpy.newaxis])

In [None]:
# batches = 2048

In [None]:
# X = numpy.random.randint(2, size=(batches, lookback, input_width))

In [None]:
# Y = numpy.random.randint(2, size=(batches, input_width))

In [None]:
# model.autoencoder.fit(X, [X, Y], batch_size=128, epochs=100)

In [8]:
midi_dir = '/home/santiago/Projects/ProjectEuterpe/data/midi/'

In [None]:
data = concat_chords(load_chords(midi_dir))

In [None]:
gen = generator(data, shuffle=True)

In [None]:
data = load_chords(midi_dir)

In [None]:
data[0][0][3]

In [None]:
# model = VAE()
# model.create()

In [None]:
# model.autoencoder.summary()

In [None]:
chords = load_chords(midi_dir)

In [None]:
chords = chords_from_midi('/home/santiago/Projects/ProjectEuterpe/data/midi/1.mid')

In [None]:
chords

In [9]:
def augment_chords(data):
    augmented = []
    events = len(data)
    maximum = data.max(axis=0)[1]
    minimum = data.min(axis=0)[1]
    transpositions = 128 - (maximum - minimum)
    for i in range(transpositions):
        sequence = numpy.copy(data)
        for j in range(events):
            sequence[j, 1] = data[j, 1] - minimum + i
        augmented.append(sequence)
    return augmented

In [None]:
augmented = augment_chords(chords)

In [None]:
augmented

In [10]:
input_width = 1 + 8 + 8 + 16
input_width

33

In [11]:
def encode_chords(data):
    encoded = []
    for sequence in data:
        encoded_sequence = numpy.zeros((len(sequence), input_width), dtype=int)
        for i, event in enumerate(sequence):
            encoded_sequence[i, 0] = event[0]
            encoded_sequence[i, 1:9] = [int(x) for x in format(event[1], '08b')]
            encoded_sequence[i, 9:17] = [int(x) for x in format(event[2], '08b')]
            encoded_sequence[i, 17:] = [int(x) for x in format(event[3], '016b')]
        encoded.append(encoded_sequence)
    return encoded

In [None]:
encoded = encode_chords(augmented)

In [None]:
encoded

In [12]:
def prepare_chords(data):
    prepared = []
    for sequence in data:
        for i in range(len(sequence)):
            if i == 0:
                X = numpy.zeros((1, input_width), dtype=int)
            else:
                X = sequence[:i, :]
            prepared.append((X, sequence[i, :]))
    return prepared

In [None]:
prepared = prepare_chords(encoded)

In [None]:
len(prepared)

In [13]:
def load_chords(midi_dir):
    all_data = []
    midi_files = sorted(glob.glob(os.path.join(midi_dir, '*.mid')) + glob.glob(os.path.join(midi_dir, '*.midi')))
    for midi_file in midi_files:
        try:
            data = prepare_chords(encode_chords(augment_chords(chords_from_midi(midi_file))))
            all_data.extend(data)
        except (KeyboardInterrupt, SystemExit):
            raise
        except:
            print("Skipping", midi_file)
    #random.shuffle(all_data)
    return all_data

In [14]:
all_data = load_chords(midi_dir)

In [15]:
len(all_data)

125376

In [16]:
def generator(data, shuffle=True):
    while True:
        if shuffle:
            random.shuffle(data)
        for sequence in data:
            yield (sequence[0][numpy.newaxis, :, :], sequence[1][numpy.newaxis, :])

In [None]:
def vae_generator(data, shuffle=True):
    while True:
        if shuffle:
            random.shuffle(data)
        for sequence in data:
            yield (sequence[0][numpy.newaxis, :, :], sequence[1][numpy.newaxis, :])

In [17]:
gen = generator(all_data, shuffle=True)

In [22]:
input_width = 49

In [23]:
# good
input_layer = Input((None, input_width))
x = Bidirectional(LSTM(500, return_sequences=True), merge_mode='concat')(input_layer)
x = Dropout(0.5)(x)
x = Bidirectional(LSTM(500, return_sequences=False), merge_mode='concat')(x)
x = Dropout(0.5)(x)
x = Dense(100, activation='relu')(x)
output_layer = Dense(input_width, activation='sigmoid')(x)
model = Model(input_layer, output_layer)
model.compile(optimizer=Adam(lr=0.0001), loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, None, 49)          0         
_________________________________________________________________
bidirectional_3 (Bidirection (None, None, 1000)        2200000   
_________________________________________________________________
dropout_3 (Dropout)          (None, None, 1000)        0         
_________________________________________________________________
bidirectional_4 (Bidirection (None, 1000)              6004000   
_________________________________________________________________
dropout_4 (Dropout)          (None, 1000)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 100)               100100    
_________________________________________________________________
dense_4 (Dense)              (None, 49)                4949      
Total para

In [19]:
checkpoint_dir = '/home/santiago/Projects/ProjectEuterpe/checkpoints/chords/'

In [20]:
time_factor = 15360

In [None]:
# def load_chords(midi_dir):
#     all_data = []
#     midi_files = sorted(glob.glob(os.path.join(midi_dir, '*.mid')) + glob.glob(os.path.join(midi_dir, '*.midi')))
#     for midi_file in midi_files:
#         try:
#             data = chords_from_midi(midi_file)
#             print(midi_file, data)
#             all_data.extend(data)
#         except (KeyboardInterrupt, SystemExit):
#             raise
#         except:
#             print("Skipping", midi_file)
#     #random.shuffle(all_data)
#     return all_data

In [None]:
data = load_chords(midi_dir)

In [None]:
# bad
input_notes = Input((None, 1 + notes))
input_beats = Input((None, beats))
x = Concatenate()([input_notes, input_beats])
x = Bidirectional(LSTM(500, return_sequences=True), merge_mode='concat')(x)
x = Dropout(0.5)(x)
x = Bidirectional(LSTM(500, return_sequences=False), merge_mode='concat')(x)
x = Dropout(0.5)(x)
x = Dense(100, activation='relu')(x)
output_notes = Dense(1 + notes, activation='sigmoid')(x)
output_beats = Dense(beats, activation='softmax')(x)
model = Model([input_notes, input_beats], [output_notes, output_beats])
model.compile(optimizer=Adam(lr=0.0001), loss=['binary_crossentropy', 'categorical_crossentropy'], metrics=['accuracy'])
model.summary()

In [None]:
epoch = 0
while True:
    epoch += 1
    print('Epoch', epoch)
    model.fit_generator(gen, len(all_data), epochs=1)
    model.save_weights(os.path.join(checkpoint_dir, 'epoch{}.hdf5'.format(epoch)))

In [None]:
model.save_weights(os.path.join(checkpoint_dir, 'epoch3_partial.hdf5'))

In [None]:
numpy.round(model.predict(numpy.zeros((1, 1, 49))))

In [None]:
numpy.round(model.predict(all_data[0][0][numpy.newaxis, :, :]))

In [None]:
all_data[0][0]

In [None]:
all_data[0][1]

In [24]:
model.load_weights('/home/santiago/Projects/ProjectEuterpe/checkpoints/chords/lstm_binary/epoch3_partial.hdf5')

In [25]:
history = numpy.zeros((1, 1, input_width), dtype=int)
for i in range(100):
    history = numpy.concatenate([history, numpy.round(model.predict(history)).astype(int)[:, numpy.newaxis, :]], axis=1)
history = history[0, 1:, :]
history

array([[1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0],
       [1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0],
       [1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0],
       [1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0],
       [0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0],
       [0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

In [None]:
#numpy.round(model.autoencoder.predict(data[77][0])[1])

In [26]:
from mido import Message, MetaMessage, MidiFile, MidiTrack

In [27]:
def bits_to_int(bits):
    out = 0
    for bit in bits:
        out = (out << 1) | bit
    return out

In [28]:
def to_midi(data):
    midi = MidiFile()
    track = MidiTrack()
    midi.tracks.append(track)
    track.append(MetaMessage('set_tempo', tempo=500000, time=0))
    for event in data:
        onoff = 'note_on' if event[0] == 1 else 'note_off'
        note = bits_to_int(event[1:9])
        velocity = bits_to_int(event[9:17])
        time = bits_to_int(event[17:])
        track.append(Message(onoff, note=note, velocity=velocity, time=time))
    return midi

In [29]:
midi = to_midi(history)

In [None]:
t = midi.tracks[0]
list(t)

In [None]:
test = MidiFile('/home/santiago/Projects/ProjectEuterpe/data/midi/1.mid')

In [None]:
list(test.tracks[0])

In [30]:
midi.save('/home/santiago/Projects/ProjectEuterpe/data/chords/lstm_test_2.mid')

In [None]:
preds = model.predict_generator(gen, steps=len(data))

In [None]:
rounded = numpy.round(preds).astype(int)

In [None]:
rounded.shape

In [None]:
rounded[10]