In [1]:
!git clone https://github.com/masa-ita/keras-composer.git

Cloning into 'keras-composer'...
remote: Enumerating objects: 177, done.[K
remote: Counting objects: 100% (177/177), done.[K
remote: Compressing objects: 100% (166/166), done.[K
remote: Total 177 (delta 16), reused 171 (delta 10), pack-reused 0[K
Receiving objects: 100% (177/177), 1.00 GiB | 16.01 MiB/s, done.
Resolving deltas: 100% (16/16), done.


In [0]:
!mkdir keras-composer/weights

In [1]:
!pip install music21

[33mYou are using pip version 9.0.3, however version 18.0 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [2]:
import keras
from keras import objectives, backend as K
from keras.layers import Bidirectional, Dense, Embedding, Input, Lambda, LSTM, RepeatVector, TimeDistributed, Dropout
from keras.models import Model
from keras.callbacks import ModelCheckpoint, CSVLogger
from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.text import Tokenizer
import numpy as np
import os
import glob
import pickle
from music21 import converter, instrument, note, chord, stream

In [5]:
MAX_LENGTH = 300

In [6]:
class VAE(object):
    def __init__(self, vocab_size=500, max_length=300, latent_rep_size=64):
        self.encoder = None
        self.decoder = None
        self.autoencoder = None

        x = Input(shape=(max_length, vocab_size))

        vae_loss, encoded = self._build_encoder(x, latent_rep_size=latent_rep_size, max_length=max_length)
        self.encoder = Model(inputs=x, outputs=encoded)
        encoder_out = self.encoder(x)

        encoded_input = Input(shape=(latent_rep_size,))

        decoded = self._build_decoder(encoded_input, vocab_size, max_length)
        self.decoder = Model(encoded_input, decoded)
        
        decoder_out = self.decoder(encoder_out)

        self.autoencoder = Model(inputs=x, outputs=decoder_out)
        self.autoencoder.compile(optimizer='Adam',
                                 loss=vae_loss,
                                 metrics=['accuracy'])
        
    def _build_encoder(self, x, latent_rep_size=64, max_length=300, epsilon_std=0.01):
        h = LSTM(512, return_sequences=False, name='lstm_1')(x)
        h = Dropout(0.2)(h)
        h = Dense(256, activation='relu', name='dense_1')(h)

        def sampling(args):
            z_mean_, z_log_var_ = args
            batch_size = K.shape(z_mean_)[0]
            epsilon = K.random_normal(shape=(batch_size, latent_rep_size), mean=0., stddev=epsilon_std)
            return z_mean_ + K.exp(z_log_var_ / 2) * epsilon

        z_mean = Dense(latent_rep_size, name='z_mean', activation='linear')(h)
        z_log_var = Dense(latent_rep_size, name='z_log_var', activation='linear')(h)
    
        def vae_loss(x, x_decoded_mean):
            x = K.flatten(x)
            x_decoded_mean = K.flatten(x_decoded_mean)
            xent_loss = max_length * objectives.binary_crossentropy(x, x_decoded_mean)
            kl_loss = - 0.5 * K.mean(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
            return xent_loss + kl_loss

        return (vae_loss, Lambda(sampling, output_shape=(latent_rep_size,), name='lambda')([z_mean, z_log_var]))

    def _build_decoder(self, encoded, vocab_size, max_length):
        repeated_context = RepeatVector(max_length)(encoded)
    
        h = LSTM(512, return_sequences=True, name='dec_lstm_1')(repeated_context)
    
        decoded = TimeDistributed(Dense(vocab_size, activation='softmax'), name='decoded_mean')(h)
    
        return decoded


In [7]:
def parse_midi_files():
    """ Get all the notes and chords from the midi files in the ./midi_songs directory """
    notes = []
    songs = []

    for file in glob.glob("midi_songs/*.mid"):
        song = []
        midi = converter.parse(file)

        print("Parsing %s" % file)

        notes_to_parse = None

        try: # file has instrument parts
            s2 = instrument.partitionByInstrument(midi)
            notes_to_parse = s2.parts[0].recurse() 
        except: # file has notes in a flat structure
            notes_to_parse = midi.flat.notes

        for element in notes_to_parse:
            if isinstance(element, note.Note):
                song.append(str(element.pitch))
            elif isinstance(element, chord.Chord):
                song.append('.'.join(str(n) for n in element.normalOrder))
        songs.append(song)
        notes += song

    return notes, songs

In [8]:
notes, songs = parse_midi_files()

Parsing midi_songs/bwv782.mid
Parsing midi_songs/bwv783.mid
Parsing midi_songs/bwv781.mid
Parsing midi_songs/bwv780.mid
Parsing midi_songs/bwv784.mid
Parsing midi_songs/bwv785.mid
Parsing midi_songs/bwv778.mid
Parsing midi_songs/bwv786.mid
Parsing midi_songs/bwv779.mid
Parsing midi_songs/bwv774.mid
Parsing midi_songs/bwv775.mid
Parsing midi_songs/bwv777.mid
Parsing midi_songs/bwv776.mid
Parsing midi_songs/bwv772.mid
Parsing midi_songs/bwv773.mid


In [10]:
pitchnames = sorted(set(notes))
n_vocab = len(pitchnames)

note_to_int = dict((note, number) for number, note in enumerate(pitchnames))
int_to_note = dict([[number, note] for note, number in note_to_int.items()])

encoded_songs = [[note_to_int[note] for note in song] for song in songs]

# songs_text = [' '.join(song) for song in songs]

# tokenizer = Tokenizer(num_words=NUM_WORDS, filters='', lower=False)
# tokenizer.fit_on_texts(songs_text)
# note2code = tokenizer.word_index

# songs_codes = tokenizer.texts_to_sequences(songs_text)
padded_songs = pad_sequences(encoded_songs, maxlen=MAX_LENGTH)

In [11]:
temp = np.zeros((padded_songs.shape[0], MAX_LENGTH, n_vocab))
temp[np.expand_dims(np.arange(padded_songs.shape[0]), axis=0).reshape(padded_songs.shape[0], 1), 
           np.repeat(np.array([np.arange(MAX_LENGTH)]), padded_songs.shape[0], axis=0), padded_songs] = 1

one_hot_encoded_songs = temp

In [16]:
def create_model_checkpoint(dir, model_name):
    filepath = dir + '/' + \
               model_name + "-{epoch:02d}-{acc:.2f}-{loss:.2f}.h5"
    directory = os.path.dirname(filepath)

    try:
        os.stat(directory)
    except:
        os.mkdir(directory)

    checkpointer = ModelCheckpoint(filepath=filepath,
                                                              monitor='loss',
                                                              verbose=1,
                                                              save_best_only=True)

    return checkpointer

In [None]:
model = VAE(vocab_size=n_vocab, latent_rep_size=2, max_length=MAX_LENGTH)

In [17]:
checkpointer = create_model_checkpoint('vae_output', 'music_vae')
csv_logger = CSVLogger(os.path.join('vae_output', 'music_vae_log.csv'))

history = model.autoencoder.fit(x=one_hot_encoded_songs, 
                                                        y=one_hot_encoded_songs,
                                                        batch_size=1, 
                                                        epochs=8000, 
                                                        callbacks=[checkpointer, csv_logger], 
                                                        verbose=0)


Epoch 00001: loss improved from inf to 13.34625, saving model to vae_output/music_vae-01-0.04-13.35.h5

Epoch 00002: loss improved from 13.34625 to 12.45865, saving model to vae_output/music_vae-02-0.04-12.46.h5

Epoch 00003: loss improved from 12.45865 to 12.38944, saving model to vae_output/music_vae-03-0.03-12.39.h5

Epoch 00004: loss improved from 12.38944 to 12.32700, saving model to vae_output/music_vae-04-0.05-12.33.h5

Epoch 00005: loss improved from 12.32700 to 12.30000, saving model to vae_output/music_vae-05-0.04-12.30.h5

Epoch 00006: loss improved from 12.30000 to 12.26549, saving model to vae_output/music_vae-06-0.05-12.27.h5

Epoch 00007: loss did not improve from 12.26549

Epoch 00008: loss improved from 12.26549 to 12.26047, saving model to vae_output/music_vae-08-0.05-12.26.h5


KeyboardInterrupt: 

In [0]:
prediction_output = model.autoencoder.predict(songs_one_hot[np.newaxis, 0])

In [52]:
prediction_indices = np.argmax(prediction_output, axis=2)
code2note = dict([[code, note] for note, code in note2code.items()])

prediction_song = [code2note[index] for index in prediction_indices[0]]
print(prediction_song)

['5.11', '1.4', '2.5', '2.5', '4.7', '9.1', 'B-3', 'D5', 'E3', 'E5', 'F3', 'D5', 'C#5', 'G3', 'D5', 'E5', 'A3', 'F5', 'E5', 'F5', 'E5', 'F5', 'A2', 'E5', 'D5', 'D3', 'D5', 'C5', 'B-4', 'A4', '10.2', '0.4', '5.9', '7.10', '9', '7.10', '5.7', 'C4', 'E4', 'B-3', '0.2', 'C3', 'C5', 'B-4', 'F3', 'A4', 'G4', '3.9', '11.2', 'E-3', 'C5', 'B4', 'F3', 'C5', 'G3', 'E-3', 'F3', '3.9', '11.2', '0', 'B2', 'D5', 'C5', 'G#3', 'D5', 'G3', 'F3', 'E-3', '2.7', '9.0', '11.2', '0.3', '2.5', '3.7', '8.0', 'B2', 'D5', 'A2', '11.0', 'G2', 'B4', 'A4', 'G4', 'F5', 'G2', 'E-5', 'D5', '8.0', '7.10', '5.8', '7', 'G#2', 'F4', 'E-5', 'A2', 'D5', 'C5', '7.10', '5.9', '3.7', '7.9', 'B-2', 'F5', 'E-5', 'B2', 'D5', 'F5', '0.3', '2.7', '9.0', '11.2', '0.3', '2.5', '3.7', '8.0', 'D3', 'B4', 'A4', 'B4', 'C3', 'B2', 'D5', 'A2', 'G2', 'F3', 'E-3', 'F4', 'D3', 'C3', 'E-4', 'B-2', 'G#2', 'C5', 'G2', 'F2', 'D4', 'E-2', 'F2', 'C5', 'B4', 'G2', 'A4', 'B4', '0', '2.7', '0.3', '2.5', 'G3', 'E-5', 'D5', 'G#3', 'E-5', 'B-3', 'G3', 'G

In [0]:
def create_midi(prediction_output, file_path):
    """ convert the output from the prediction to notes and create a midi file
        from the notes """
    offset = 0
    output_notes = []

    # create note and chord objects based on the values generated by the model
    for pattern in prediction_output:
        # pattern is a chord
        if ('.' in pattern) or pattern.isdigit():
            notes_in_chord = pattern.split('.')
            notes = []
            for current_note in notes_in_chord:
                new_note = note.Note(int(current_note))
                new_note.storedInstrument = instrument.Piano()
                notes.append(new_note)
            new_chord = chord.Chord(notes)
            new_chord.offset = offset
            output_notes.append(new_chord)
        # pattern is a note
        else:
            new_note = note.Note(pattern)
            new_note.offset = offset
            new_note.storedInstrument = instrument.Piano()
            output_notes.append(new_note)

        # increase offset each iteration so that notes do not stack
        offset += 0.5

    midi_stream = stream.Stream(output_notes)

    midi_stream.write('midi', fp=file_path)

In [0]:
create_midi(prediction_song, 'test_vae_onehot_out4.midi')

In [0]:
from google.colab import files

files.download('test_vae_onehot_out4.midi')

In [0]:
decoder_input = np.array([[-0.0001, 0.0001]])
decoder_predicted = model.decoder.predict(decoder_input)

In [78]:
prediction_indices = np.argmax(decoder_predicted, axis=2)
code2note = dict([[code, note] for note, code in note2code.items()])

prediction_song = [code2note[index] for index in prediction_indices[0]]
print(prediction_song)

['0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6', '0.6'

In [0]:
files.download('weights/music_vae-4128-0.90-1.45.h5')