In [1]:
from keras import objectives, backend as K
from keras.layers import Bidirectional, Dense, Embedding, Input, Lambda, LSTM, RepeatVector, TimeDistributed
from keras.models import Model
import keras

Using TensorFlow backend.
  return f(*args, **kwds)


In [2]:
class VAE(object):
    def __init__(self, vocab_size=500, max_length=300, latent_rep_size=200):
        self.encoder = None
        self.decoder = None
        self.autoencoder = None

        x = Input(shape=(max_length,))
        x_embed = Embedding(vocab_size, 64, input_length=max_length)(x)

        vae_loss, encoded = self._build_encoder(x_embed, latent_rep_size=latent_rep_size, max_length=max_length)
        self.encoder = Model(inputs=x, outputs=encoded)

        encoded_input = Input(shape=(latent_rep_size,))

        decoded = self._build_decoder(encoded_input, vocab_size, max_length)
        self.decoder = Model(encoded_input, decoded)

        self.autoencoder = Model(inputs=x, outputs=self._build_decoder(encoded, vocab_size, max_length))
        self.autoencoder.compile(optimizer='Adam',
                                 loss=vae_loss,
                                 metrics=['accuracy'])
        
    def _build_encoder(self, x, latent_rep_size=200, max_length=300, epsilon_std=0.01):
        h = Bidirectional(LSTM(500, return_sequences=True, name='lstm_1'), merge_mode='concat')(x)
        h = Bidirectional(LSTM(500, return_sequences=False, name='lstm_2'), merge_mode='concat')(h)
        h = Dense(435, activation='relu', name='dense_1')(h)

        def sampling(args):
            z_mean_, z_log_var_ = args
            batch_size = K.shape(z_mean_)[0]
            epsilon = K.random_normal(shape=(batch_size, latent_rep_size), mean=0., stddev=epsilon_std)
            return z_mean_ + K.exp(z_log_var_ / 2) * epsilon

        z_mean = Dense(latent_rep_size, name='z_mean', activation='linear')(h)
        z_log_var = Dense(latent_rep_size, name='z_log_var', activation='linear')(h)
    
        def vae_loss(x, x_decoded_mean):
            x = K.flatten(x)
            x_decoded_mean = K.flatten(x_decoded_mean)
            xent_loss = max_length * objectives.binary_crossentropy(x, x_decoded_mean)
            kl_loss = - 0.5 * K.mean(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
            return xent_loss + kl_loss

        return (vae_loss, Lambda(sampling, output_shape=(latent_rep_size,), name='lambda')([z_mean, z_log_var]))

    def _build_decoder(self, encoded, vocab_size, max_length):
        repeated_context = RepeatVector(max_length)(encoded)
    
        h = LSTM(500, return_sequences=True, name='dec_lstm_1')(repeated_context)
        h = LSTM(500, return_sequences=True, name='dec_lstm_2')(h)
    
        decoded = TimeDistributed(Dense(vocab_size, activation='softmax'), name='decoded_mean')(h)
    
        return decoded


In [3]:
from keras.callbacks import ModelCheckpoint
from keras.datasets import imdb
from keras.preprocessing.sequence import pad_sequences
# from model import VAE
import numpy as np
import os

In [4]:
MAX_LENGTH = 300
NUM_WORDS = 1000

In [5]:
import glob
import pickle
import numpy
from music21 import converter, instrument, note, chord

In [6]:
def get_notes():
    """ Get all the notes and chords from the midi files in the ./midi_songs directory """
    notes = []
    songs = []

    for file in glob.glob("midi_songs/*.mid"):
        song = []
        midi = converter.parse(file)

        print("Parsing %s" % file)

        notes_to_parse = None

        try: # file has instrument parts
            s2 = instrument.partitionByInstrument(midi)
            notes_to_parse = s2.parts[0].recurse() 
        except: # file has notes in a flat structure
            notes_to_parse = midi.flat.notes

        for element in notes_to_parse:
            if isinstance(element, note.Note):
                song.append(str(element.pitch))
            elif isinstance(element, chord.Chord):
                song.append('.'.join(str(n) for n in element.normalOrder))
        songs.append(song)
        notes += song

    with open('data/notes', 'wb') as filepath:
        pickle.dump(notes, filepath)

    return notes, songs


In [7]:
def prepare_sequences(notes, n_vocab):
    """ Prepare the sequences used by the Neural Network """
    sequence_length = 100

    # get all pitch names
    pitchnames = sorted(set(item for item in notes))

     # create a dictionary to map pitches to integers
    note_to_int = dict((note, number) for number, note in enumerate(pitchnames))

    network_input = []
    network_output = []

    # create input sequences and the corresponding outputs
    for i in range(0, len(notes) - sequence_length, 1):
        sequence_in = notes[i:i + sequence_length]
        sequence_out = notes[i + sequence_length]
        network_input.append([note_to_int[char] for char in sequence_in])
        network_output.append(note_to_int[sequence_out])

    n_patterns = len(network_input)

    # reshape the input into a format compatible with LSTM layers
    network_input = numpy.reshape(network_input, (n_patterns, sequence_length, 1))
    # normalize input
    # network_input = network_input / float(n_vocab)

    # network_input = np_utils.to_categorical(network_input)
    network_output = np_utils.to_categorical(network_output)

    return (network_input, network_output)

In [8]:
notes, songs = get_notes()

Parsing midi_songs/bwv782.mid
Parsing midi_songs/bwv783.mid
Parsing midi_songs/bwv781.mid
Parsing midi_songs/bwv780.mid
Parsing midi_songs/bwv784.mid
Parsing midi_songs/bwv785.mid
Parsing midi_songs/bwv778.mid
Parsing midi_songs/bwv786.mid
Parsing midi_songs/bwv779.mid
Parsing midi_songs/bwv774.mid
Parsing midi_songs/bwv775.mid
Parsing midi_songs/bwv777.mid
Parsing midi_songs/bwv776.mid
Parsing midi_songs/bwv772.mid
Parsing midi_songs/bwv773.mid


In [36]:
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

songs_text = [' '.join(song) for song in songs]

tokenizer = Tokenizer(num_words=NUM_WORDS, filters='', lower=False)
tokenizer.fit_on_texts(songs_text)
note2code = tokenizer.word_index

songs_codes = tokenizer.texts_to_sequences(songs_text)
padded_songs = pad_sequences(songs_codes, maxlen=MAX_LENGTH)

In [26]:
temp = np.zeros((padded_songs.shape[0], MAX_LENGTH, NUM_WORDS))
temp[np.expand_dims(np.arange(padded_songs.shape[0]), axis=0).reshape(padded_songs.shape[0], 1), np.repeat(np.array([np.arange(MAX_LENGTH)]), padded_songs.shape[0], axis=0), padded_songs] = 1

songs_one_hot = temp

In [38]:
def create_model_checkpoint(dir, model_name):
    filepath = dir + '/' + \
               model_name + "-{epoch:02d}-{acc:.2f}-{loss:.2f}.h5"
    directory = os.path.dirname(filepath)

    try:
        os.stat(directory)
    except:
        os.mkdir(directory)

    checkpointer = ModelCheckpoint(filepath=filepath,
                                   verbose=1,
                                   save_best_only=False)

    return checkpointer

In [39]:
model = VAE(vocab_size=NUM_WORDS, max_length=MAX_LENGTH)

checkpointer = create_model_checkpoint('models', 'music_vae')

model.autoencoder.fit(x=padded_songs, y=songs_one_hot,
                      batch_size=1, epochs=100, callbacks=[checkpointer])

Epoch 1/100

Epoch 00001: saving model to models/music_vae-01-0.03-1.95.h5
Epoch 2/100

Epoch 00002: saving model to models/music_vae-02-0.04-1.58.h5
Epoch 3/100

Epoch 00003: saving model to models/music_vae-03-0.04-1.55.h5
Epoch 4/100

Epoch 00004: saving model to models/music_vae-04-0.03-1.55.h5
Epoch 5/100

Epoch 00005: saving model to models/music_vae-05-0.03-1.54.h5
Epoch 6/100

Epoch 00006: saving model to models/music_vae-06-0.03-1.53.h5
Epoch 7/100

Epoch 00007: saving model to models/music_vae-07-0.03-1.53.h5
Epoch 8/100

Epoch 00008: saving model to models/music_vae-08-0.04-1.54.h5
Epoch 9/100

Epoch 00009: saving model to models/music_vae-09-0.04-1.53.h5
Epoch 10/100

Epoch 00010: saving model to models/music_vae-10-0.05-1.53.h5
Epoch 11/100

Epoch 00011: saving model to models/music_vae-11-0.04-1.54.h5
Epoch 12/100

Epoch 00012: saving model to models/music_vae-12-0.03-1.55.h5
Epoch 13/100

Epoch 00013: saving model to models/music_vae-13-0.04-1.53.h5
Epoch 14/100

Epoch 0


Epoch 00052: saving model to models/music_vae-52-0.05-1.51.h5
Epoch 53/100

Epoch 00053: saving model to models/music_vae-53-0.05-1.52.h5
Epoch 54/100

Epoch 00054: saving model to models/music_vae-54-0.05-1.51.h5
Epoch 55/100

Epoch 00055: saving model to models/music_vae-55-0.05-1.51.h5
Epoch 56/100

Epoch 00056: saving model to models/music_vae-56-0.05-1.51.h5
Epoch 57/100

Epoch 00057: saving model to models/music_vae-57-0.05-1.51.h5
Epoch 58/100

Epoch 00058: saving model to models/music_vae-58-0.05-1.51.h5
Epoch 59/100

Epoch 00059: saving model to models/music_vae-59-0.05-1.51.h5
Epoch 60/100

Epoch 00060: saving model to models/music_vae-60-0.05-1.51.h5
Epoch 61/100

Epoch 00061: saving model to models/music_vae-61-0.05-1.51.h5
Epoch 62/100

Epoch 00062: saving model to models/music_vae-62-0.05-1.51.h5
Epoch 63/100

Epoch 00063: saving model to models/music_vae-63-0.05-1.51.h5
Epoch 64/100

Epoch 00064: saving model to models/music_vae-64-0.05-1.51.h5
Epoch 65/100

Epoch 00065

<keras.callbacks.History at 0x128684eb8>

In [40]:
prediction_output = model.autoencoder.predict(padded_songs[np.newaxis, 0])

In [59]:
prediction_indices = np.argmax(prediction_output, axis=2)
code2note = dict([[code, note] for note, code in note2code.items()])

prediction_song = [code2note[index] for index in prediction_indices[0]]
print(prediction_song)

['A3', 'E4', 'E4', 'E4', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5', 'D5

In [62]:
from music21 import instrument, note, stream, chord

def create_midi(prediction_output, file_path):
    """ convert the output from the prediction to notes and create a midi file
        from the notes """
    offset = 0
    output_notes = []

    # create note and chord objects based on the values generated by the model
    for pattern in prediction_output:
        # pattern is a chord
        if ('.' in pattern) or pattern.isdigit():
            notes_in_chord = pattern.split('.')
            notes = []
            for current_note in notes_in_chord:
                new_note = note.Note(int(current_note))
                new_note.storedInstrument = instrument.Piano()
                notes.append(new_note)
            new_chord = chord.Chord(notes)
            new_chord.offset = offset
            output_notes.append(new_chord)
        # pattern is a note
        else:
            new_note = note.Note(pattern)
            new_note.offset = offset
            new_note.storedInstrument = instrument.Piano()
            output_notes.append(new_note)

        # increase offset each iteration so that notes do not stack
        offset += 0.5

    midi_stream = stream.Stream(output_notes)

    midi_stream.write('midi', fp=file_path)

In [63]:
create_midi(prediction_song, 'test_vae_out.midi')