In [1]:
from init import *
from music21 import converter, stream, instrument, note, chord
from tensorflow.keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout, Activation
from keras.layers import BatchNormalization as BatchNorm
from keras.callbacks import ModelCheckpoint
from keras.models import load_model

autotime loaded.


Using TensorFlow backend.


In [2]:
class Music():
    def __init__(self, filenames):
        self.files = filenames
        
    def get_musical_notes(self):
        notes = []
        for filename in self.files:
            song = converter.parse(filename)
            parts = instrument.partitionByInstrument(song)
            notes_to_parse = None
            if parts:
                notes_to_parse = parts.parts[0].recurse()
            else:
                notes_to_parse = song.flat.notes
                
            for el in notes_to_parse:
                if isinstance(el, note.Note):
                    notes.append(str(el.pitch))
                elif isinstance(el, chord.Chord):
                    chords = '.'.join(str(n) for n in el.normalOrder)
                    notes.append(chords)
        return notes
    
    def get_encoded_notes(self):
        notes = self.get_musical_notes()
        self.unique_notes = sorted(set(notes))
        self.pitchdict = {note:number for number, note in enumerate(self.unique_notes)}
        self.inverse_pitchdict = {number:note for number, note in enumerate(self.unique_notes)}
        return list(map(self.pitchdict.get, notes))
    
    def get_training_sequences(self, sequence_length=100):
        ins = []
        out = []
        notes = self.get_encoded_notes()
        for i in range(0, len(notes) - sequence_length, 1):
                ins.append(notes[i:i+sequence_length])
                out.append(notes[i+sequence_length])
        ins = np.expand_dims(np.array(ins), axis=2) / float(len(self.unique_notes))
        out = to_categorical(out)
        return ins, out
    
    def compile_model(self, inputs, latent_dim=256):
        shape = (inputs.shape[1], inputs.shape[2])
        model = Sequential()
        model.add(LSTM(latent_dim, input_shape=shape, recurrent_dropout=0.3, return_sequences=True))
        model.add(LSTM(2 * latent_dim, recurrent_dropout=0.3, return_sequences=True))
        model.add(LSTM(latent_dim))
        model.add(BatchNorm())
        model.add(Dropout(0.3))
        model.add(Dense(latent_dim))
        model.add(Activation('relu'))
        model.add(BatchNorm())
        model.add(Dropout(0.3))
        model.add(Dense(len(self.unique_notes)))
        model.add(Activation('softmax'))
        model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
        self.model = model
        return model
    
    def train(self, X, y, checkpoint='best_model.h5', epochs=100, batch_size=128, split=0.01, verbose=0):
        mc = ModelCheckpoint(checkpoint, monitor='val_loss', mode='min', verbose=1, save_best_only=True)
        self.history = self.model.fit(X, y, epochs=epochs, batch_size=batch_size, 
                                      validation_split=split, verbose=verbose, 
                                      callbacks=[mc])
        return self.history
    
    def predict(self, sequence, model_name='best_model.h5'):
        sequence = list(np.squeeze(sequence))      
        model = load_model(model_name)
        output = []
        for note_index in range(500):
            inputs = np.reshape(sequence, (1, len(sequence), 1)) 
            inputs = inputs / float(len(self.unique_notes))
            prediction = model.predict(inputs, verbose=0)
            index = np.argmax(prediction)
            output.append(self.inverse_pitchdict[index])
            sequence.append(index)
            sequence = sequence[1:len(sequence)]
        return output
    
    def generate_notes(self, sequence, model_name='best_model.h5', outfile='test_output.mid'):
        offset = 0
        output_notes = []
        sequence = list(np.squeeze(sequence))  
        out_notes = self.predict(sequence, model_name)
        for pattern in out_notes:
            if ('.' in pattern) or pattern.isdigit():
                notes_in_chord = pattern.split('.')
                notes = []
                for current_note in notes_in_chord:
                    new_note = note.Note(int(current_note))
                    new_note.storedInstrument = instrument.Piano()
                    notes.append(new_note)
                new = chord.Chord(notes)
            else:
                new = note.Note(pattern)
                new.storedInstrument = instrument.Piano()        
            new.offset = offset
            output_notes.append(new)
            offset += 0.5
        midi_stream = stream.Stream(output_notes)
        midi_stream.write('midi', fp=outfile)
        print('New music has been generated.')

time: 24.8 ms


In [3]:
epochs = 50
batch_size = 256
latent_dim = 256
sequence_length = 100
checkpoint = 'best_model.h5'
files = glob.glob('./songs/*.mid')
music = Music(files)
notes = music.get_encoded_notes()
ins, out = music.get_training_sequences(sequence_length)
model = music.compile_model(ins, latent_dim)
print(model.summary())

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 100, 256)          264192    
_________________________________________________________________
lstm_2 (LSTM)                (None, 100, 512)          1574912   
_________________________________________________________________
lstm_3 (LSTM)                (None, 256)               787456    
_________________________________________________________________
batch_normalization_1 (Batch (None, 256)               1024      
_________________________________________________________________
dropout_1 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 256)               65792     
_______________________________________

In [4]:
hist = music.train(ins, out, checkpoint, epochs, batch_size, verbose=1, split=0.01)


Train on 56506 samples, validate on 571 samples
Epoch 1/50

Epoch 00001: val_loss improved from inf to 4.38825, saving model to best_model.h5
Epoch 2/50

Epoch 00002: val_loss did not improve from 4.38825
Epoch 3/50

Epoch 00003: val_loss did not improve from 4.38825
Epoch 4/50

Epoch 00004: val_loss improved from 4.38825 to 4.37649, saving model to best_model.h5
Epoch 5/50

Epoch 00005: val_loss did not improve from 4.37649
Epoch 6/50

Epoch 00006: val_loss improved from 4.37649 to 4.25812, saving model to best_model.h5
Epoch 7/50

Epoch 00007: val_loss did not improve from 4.25812
Epoch 8/50

Epoch 00008: val_loss did not improve from 4.25812
Epoch 9/50

Epoch 00009: val_loss improved from 4.25812 to 4.19251, saving model to best_model.h5
Epoch 10/50

Epoch 00010: val_loss did not improve from 4.19251
Epoch 11/50

Epoch 00011: val_loss did not improve from 4.19251
Epoch 12/50

Epoch 00012: val_loss improved from 4.19251 to 4.19157, saving model to best_model.h5
Epoch 13/50

Epoch 00

In [7]:
music.generate_notes(ins[2], outfile='new_music.mid')

New music has been generated.
time: 58.4 s
