# Requirements

In [None]:
!pip install music21

# Data processing

In [4]:
import glob
from music21 import converter, instrument, note, chord, stream
import pickle as pk
import numpy as np
from keras.utils import to_categorical
import matplotlib.pyplot as plt
from tqdm import tqdm

In [2]:
#Read all midi files in the folder
#Data can be downloaded from "http://www.piano-midi.de/mozart.htm"
midi_files = glob.glob("./mozart/*.mid")

In [5]:
notes = []
for i in tqdm(range(len(midi_files))):  #Iterate through each file
    file = midi_files[i]
    data = converter.parse(file)  #Parse the midi file into notes and chords,etc
    #Identify the instruments available
    # In this case only piano
    piano = instrument.partitionByInstrument(data)
    #Iterate through each note
    # note can be represented by pitch
    # chord represented by string notation
    for val in piano.parts[0].recurse():
        if isinstance(val, note.Note):
            notes.append(str(val.pitch))
        elif isinstance(val, chord.Chord):
            notes.append('.'.join(str(n) for n in val.normalOrder))

100%|██████████| 21/21 [02:48<00:00,  8.04s/it]


In [7]:
print(notes[1:100])
print("\nTotal number of notes: {}".format(len(notes)))

['4.7', '0.4', '9.0', '9.0', '2.5', '5.9', '2.5', '11.2', '7.11', '7.11', 'C5', '4.7', 'D5', 'E5', 'C5', 'A4', 'F3', 'B4', 'C5', 'C#5', 'D5', 'E5', 'F5', 'D3', 'D5', 'C5', 'G3', 'B4', 'A4', 'G4', '4.7', '4.7', '0.4', '9.0', '9.0', '5.9', '2.5', '5.9', '11.2', '7.11', '7.11', 'A5', '0.4', 'G5', 'F5', 'E5', 'C#5', '5.9', 'D5', 'E5', 'F5', 'A4', 'C5', 'B4', '5.7', 'D5', 'C5', '0.4', 'C3', '4.7', '4.7', '0.4', '9.0', '9.0', '2.5', '5.9', '2.5', '11.2', '7.11', '7.11', 'C5', '4.7', 'D5', 'E5', 'C5', 'A4', 'F3', 'B4', 'C5', 'C#5', 'D5', 'E5', 'F5', 'D3', 'D5', 'C5', 'G3', 'B4', 'A4', 'G4', '4.7', '4.7', '0.4', '9.0', '9.0', '5.9', '2.5', '5.9', '11.2']

Total number of notes: 59618



## Prepare data for network



In [8]:
#get unique elements in notes
vals_list = sorted(set(element for element in notes))

In [9]:
print("No of unique notes: {}".format(len(vals_list)))

No of unique notes: 197


In [10]:
#create a dictionary that maps notes to indices
notes_to_index = dict((note,index) for index, note in enumerate(vals_list))

In [None]:
sequence_length = 50 #length of input sequence to network 
network_input = []
network_output = []
n_vocab = len(vals_list)
# create input sequences and the corresponding outputs
for i in range(len(notes) - sequence_length):
    sequence_in = notes[i:i + sequence_length] #Take i to i+seq_len - 1 as input
    sequence_out = notes[i + sequence_length] # Take i + seq_len as output
    network_input.append([notes_to_index[char] for char in sequence_in]) #convert notes to integer index
    network_output.append(notes_to_index[sequence_out])
    n_patterns = len(network_input)# reshape the input into a format compatible with LSTM layers
network_input = np.reshape(network_input, (n_patterns, sequence_length, 1))
# normalize input
network_input = network_input / float(n_vocab) #Range between 0 and 1
network_output = to_categorical(network_output) #convert output to one hot encoded vector

In [12]:
from keras.models import Model
from keras.layers import Dense, Activation, Dropout,Flatten
from keras.layers import LSTM, Input, Bidirectional
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint

In [23]:
# Base LSTM model
def create_model_v1():
    
    inputs = Input(shape=(sequence_length,1)) #Input no sequences
    l1 = LSTM(256,return_sequences=True)(inputs)
    
    l2 = LSTM(512,return_sequences=True)(l1)

    l3 = LSTM(256,return_sequences=False)(l2)
    l3 = Dropout(rate=0.5)(l3)

    l4 = Dense(256,activation='relu')(l3)
    l4 = Dropout(rate=0.5)(l4)

    l5 = Dense(n_vocab)(l4) #output shape corresponds to no of unique vals
    l5 = Activation('softmax')(l5)

    model = Model(inputs=inputs, outputs=l5)

    return model


In [24]:
# LSTM model with more dropout and nodes
def create_model_v2():
    
    inputs = Input(shape=(sequence_length,1)) #Input no sequences
    l1 = LSTM(256,return_sequences=True)(inputs)
    l1 = Dropout(rate=0.2)(l1)
    
    l2 = LSTM(512,return_sequences=True)(l1)
    l2 = Dropout(rate=0.2)(l2)
    
    l3 = LSTM(512,return_sequences=False)(l2)
    l3 = Dropout(rate=0.5)(l3)

    l4 = Dense(256,activation='relu')(l3)
    l4 = Dropout(rate=0.2)(l4)

    l5 = Dense(n_vocab)(l4) #output shape corresponds to no of unique vals
    l5 = Activation('softmax')(l5)

    model = Model(inputs=inputs, outputs=l5)

    return model






In [25]:
# Base Bidirectional LSTM model
def create_model_v3():
    
    inputs = Input(shape=(sequence_length,1)) #Input no sequences
    l1 = Bidirectional(LSTM(256,return_sequences=True))(inputs)
    
    l2 = Bidirectional(LSTM(512,return_sequences=True))(l1)

    l3 = Bidirectional(LSTM(256,return_sequences=False))(l2)
    l3 = Dropout(rate=0.5)(l3)

    l4 = Dense(256,activation='relu')(l3)
    l4 = Dropout(rate=0.5)(l4)

    l5 = Dense(n_vocab)(l4) #output shape corresponds to no of unique vals
    l5 = Activation('softmax')(l5)

    model = Model(inputs=inputs, outputs=l5)

    return model


In [26]:
# modified v2 LSTM model
def create_model_v4():
    
    inputs = Input(shape=(sequence_length,1)) #Input no sequences
    l1 = LSTM(256,return_sequences=True)(inputs)
    
    l2 = LSTM(512,return_sequences=True)(l1)

    l3 = LSTM(256,return_sequences=False)(l2)
    l3 = Dropout(rate=0.3)(l3)

    l4 = Dense(256,activation='relu')(l3)

    l5 = Dense(n_vocab)(l4) #output shape corresponds to no of unique vals
    l5 = Activation('softmax')(l5)

    model = Model(inputs=inputs, outputs=l5)

    return model






In [30]:
model = create_model_v4()

In [31]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_7 (InputLayer)         (None, 50, 1)             0         
_________________________________________________________________
lstm_19 (LSTM)               (None, 50, 256)           264192    
_________________________________________________________________
lstm_20 (LSTM)               (None, 50, 512)           1574912   
_________________________________________________________________
lstm_21 (LSTM)               (None, 256)               787456    
_________________________________________________________________
dropout_15 (Dropout)         (None, 256)               0         
_________________________________________________________________
dense_12 (Dense)             (None, 256)               65792     
_________________________________________________________________
dense_13 (Dense)             (None, 197)               50629     
__________

In [None]:
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [None]:
#Save the model weight that has lowest training loss
checkpoint = ModelCheckpoint('best_v4.h5',save_best_only=True,verbose=1,monitor='loss',mode='min')

In [None]:
history = model.fit(network_input,network_output,batch_size=128,epochs=110,verbose=1,callbacks=[checkpoint])

In [None]:
#Plot the training curve
plt.plot(history.history['loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.show()



In [None]:
#start with random note
start = np.random.randint(0, len(network_input)-1)

#dictionary that maps indices to notes 
index_to_notes = dict({index:note for index, note in enumerate(vals_list)})

pattern = list(network_input[start])
prediction_output = []

#Generate sequences of length 500
for note_index in range(200):
    # get proper shape
    prediction_input = np.reshape(pattern, (1, len(pattern), 1))
    prediction_input = prediction_input / float(n_vocab)    
    prediction = model.predict(prediction_input, verbose=0)    
    index = np.argmax(prediction)
    result = index_to_notes[index]
    prediction_output.append(result)
    
    #use predicted result with old input for next iteration
    pattern.append(index)
    pattern = pattern[1:len(pattern)]

In [None]:
'''
chords are array of notes and need to identified as such
pitch is just one instance of a note
'''

offset = 0
output_notes = []
for pattern in prediction_output:
    # pattern is a chord
    if ('.' in pattern) or pattern.isdigit():
        notes_in_chord = pattern.split('.')
        notes = []
        for current_note in notes_in_chord:
            new_note = note.Note(int(current_note))
            new_note.storedInstrument = instrument.Piano()
            notes.append(new_note)
        new_chord = chord.Chord(notes)
        new_chord.offset = offset
        output_notes.append(new_chord)
    # pattern is a note
    else:
        new_note = note.Note(pattern)
        new_note.offset = offset
        new_note.storedInstrument = instrument.Piano()
        output_notes.append(new_note)    # increase offset each iteration so that notes do not stack
    offset += 0.5

In [None]:
#Save as midid
midi_stream = stream.Stream(output_notes)
midi_stream.write('midi', fp='len_50_model_v2.mid')