In [22]:
# 3rd-Party Modules 
import music21
import tqdm
from keras.utils import np_utils
import numpy as np
from tensorflow.keras.callbacks import ModelCheckpoint
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import BatchNormalization as BatchNorm
from keras.layers import LSTM
from keras.layers import Activation
from keras.models import Sequential

# Built-In Modules 
import glob
import pickle

Based off this repo here: https://github.com/Skuldur/Classical-Piano-Composer

## Prepare the Data
Loop over our data that we have, converting them to music21 objects so we can pass it to our model. It takes a while so I've included a .pkl file that can be opened in the next step. 

In [2]:
notes = []

files = glob.glob("input_data/*/*.mid")
for x in tqdm.tqdm(range(len(files))):
    file = files[x]
    # Convert the file into a music21 objects
    midi = music21.converter.parse(file)
    
    # Variable to keep how many different notes will be needed to parse
    notes_to_parse = None
    
    # Seporate our any differnt parts if they exists
    parts = music21.instrument.partitionByInstrument(midi)
    if parts: 
        # File has differnt parts
        notes_to_parse = parts.parts[0].recurse()
    else: 
        # File does not have multiple parts
        notes_to_parse = midi.flat.notes
    
    # Loop over the notes we extracted
    for element in notes_to_parse:
        if isinstance(element, music21.note.Note):
            # If its a Note object -> add its pitch
            notes.append(str(element.pitch))
        elif isinstance(element, music21.chord.Chord):
            # Its its a Chord object -> Loop and add the ID of every note in the chord
            notes.append('.'.join(str(n) for n in element.normalOrder))

100%|██████████| 292/292 [02:29<00:00,  1.95it/s]


In [6]:
# Optional: Pickle the output, and save it for next time 
with open('checkpoints/notes_pickle.pkl', 'wb') as f:
    pickle.dump(notes, f)

## Process the data
Now we have our input data stored as a mix of words and numbers. ML always works better when working with numbers, so lets encode out data by creating a mapping from words to integers. 

In [None]:
# Optional: Open up the pickled file
with open('checkpoints/notes_pickle.pkl', 'rb') as f:
    notes = pickle.load(f)

In [9]:
# Number of 'vocab' words is the number of notes
n_vocab = len(set(notes))

# Variable sequence_length: This controls how much data is needed before a note, to predict the note itself
sequence_length = 100

# Grab all of the different pitch names and map the piches to integers
pitchnames = sorted(set(item for item in notes))
note_to_int = dict((note, number) for number, note in enumerate(pitchnames))

# Capture out input/output
network_input = []
network_output = []

# create input sequences and the corresponding outputs
for i in range(0, len(notes) - sequence_length, 1):
    sequence_in = notes[i:i + sequence_length]
    sequence_out = notes[i + sequence_length]
    network_input.append([note_to_int[char] for char in sequence_in])
    network_output.append(note_to_int[sequence_out])
n_patterns = len(network_input)

# reshape the input into a format compatible with LSTM layers
network_input = np.reshape(network_input, (n_patterns, sequence_length, 1))

# Normalize the input (i.e. each value divded by the number of words)
network_input = network_input / float(n_vocab)
network_output = np_utils.to_categorical(network_output)

## Create the Model

In [14]:
def create_model(network_input, n_vocab):
    """
    Creates the model that will be used to train the data 
    :param List network_input: Input data that will be used, needed for the .shape
    :param Integer n_vocab: The number of vocab words
    """
    model = Sequential()
    model.add(LSTM(
        512,
        input_shape=(network_input.shape[1], network_input.shape[2]),
        recurrent_dropout=0.3,
        return_sequences=True
    ))
    model.add(LSTM(512, return_sequences=True, recurrent_dropout=0.3,))
    model.add(LSTM(512))
    model.add(BatchNorm())
    model.add(Dropout(0.3))
    model.add(Dense(256))
    model.add(Activation('relu'))
    model.add(BatchNorm())
    model.add(Dropout(0.3))
    model.add(Dense(n_vocab))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

    return model

model = create_model(network_input, n_vocab)

## Train Model

In [24]:
filepath = "check_points/weights-improvement-{epoch:02d}-{loss:.4f}-bigger.hdf5"
checkpoint = ModelCheckpoint(filepath)
callbacks_list = [checkpoint]

model.fit(network_input, network_output, epochs=200, batch_size=128, callbacks=callbacks_list)

Epoch 1/200
   9/4029 [..............................] - ETA: 4:11:36 - loss: 6.0661

KeyboardInterrupt: 