In [134]:
from music21 import converter, instrument, note, chord, key, tempo, duration, stream

import numpy as np
import pandas as pd

from os import listdir, path
from sys import maxsize, getsizeof

from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

from keras.models import Sequential, load_model
from keras.layers import LSTM, Dropout, Dense, Activation, LeakyReLU
from keras.callbacks import ModelCheckpoint

In [142]:
#defining function to read MIDI files
def read_midi(file, time_tol = 1.e-3):
    """This function reads a midi file for the notes, offsets, and durations for each piano part, separately.
    The data needs to be further processed because there are many notes occurring at identical offsets (meaning 
    they start at the same time) but have different durations. I convert this sequence into one in which there is
    no overlap, i.e. any time there is a change in the state of the piano keyboard, a new element is created 
    describing this state along with a duration for that state. This format will be much easier to use for encoding 
    the data into the neural network input later on. These sequences, separated by part, are  returned along with 
    the musical key of the song (we need this because we will eventually transpose everything to the key of C.)"""
    
    print("Loading Music File:",file)
    notes_by_part = []          # MIDI files can contain multiple parts. I will focus on piano, but even here
                                # there may be different files for the right hand and left hand, for example

    midi = converter.parse(file)
    parts = instrument.partitionByInstrument(midi)  # will extract the parsed data all instrument instructions
                                                    # separately
    keys_by_song = []           # An array, to be returned, containing elements of keys (below) for each song
    notes_by_part = {}          # A dict for storing the processes sequences by part (the keys of the dict are
                                # the part names)

    for part in parts:
        notes_to_parse = part.recurse()
        notes = []        # This list will form the values of notes_by_part
        keys = []         # A list will contain the musical keys (one element per part)
        
        bpm = None        # Beats per minute, this changes a lot throughout the song.
                          # will use to scale durations. Actually, to keep it simpler, we
                          # will ignore this for now. The durations will just be in units
                          # of quarter notes (AKA beats)
                    
        offset = 0        # the offset is the number (float value) of beats into a song
        last_offset = 0   # the notes we are currently reading in start at.
        
        all_notes_ato = []  # All notes At This Offset (ATO). Tracks the notes that need to
                            # be added in between two offsets
        for element in notes_to_parse:
            if (isinstance(element, instrument.Piano) or isinstance(element, instrument.Instrument)):
                continue
            if (isinstance(element, tempo.MetronomeMark)):    # Update bpm
                    bpm = element.number
            elif (isinstance(element, key.Key)):    # Musical Key
                keys.append(str(element)) 
            elif (isinstance(element, note.Rest)):  # Ignore rests in the file, I will infer them from the
                continue                            # offsets and durations
            else:
                if (bpm is None):
                    print('bpm is None before first note, skipping part')
                    break
                    
                if (element.offset == last_offset):   # We're still at this offset, so keep adding to all_notes_ato
                    if (isinstance(element, note.Note)):     # Note
                        all_notes_ato.append((str(element.pitch), \
                                                         element.duration.quarterLength))
                    elif (isinstance(element, chord.Chord)):
                        all_notes_ato.append(('.'.join(str(n) for n in element.pitches), \
                                                           element.duration.quarterLength))                                   
                else:    # a new offset, we need to write all the different piano states
                         # that occurred in this offset interval, and add a rest if the
                         # offset interval is longer than the durations of these distinct states
                    offset = element.offset
                    cur_offset = last_offset
                    if (all_notes_ato):   # We have notes to write at this offset
                        all_notes_ato.sort(key = lambda x: x[1])
                        while(cur_offset < offset):  
                            shortest_duration = all_notes_ato[0][1]
                            if (shortest_duration < (offset - cur_offset)):    # write some intermediate
                                                                               # lines, for those notes
                                                                               # whose durations fall in
                                                                               # this offset interval
                                notes.append(('.'.join(n[0] for n in all_notes_ato), \
                                            all_notes_ato[0][1]))
                                cur_offset += shortest_duration
                                while(all_notes_ato and all_notes_ato[0][1] == shortest_duration):
                                    all_notes_ato.pop(0)
                                if (not all_notes_ato):
                                    notes.append(('rest', offset - cur_offset))
                                    cur_offset = offset
                                    break
                            elif (all_notes_ato[0][1] > ((offset - cur_offset) + time_tol)):  
                                # All notes leftover should be transferred, but with 
                                # their durations shortened.
                                # Added tolerance because of rounding errors.
                                corrected = []
                                for i in range(len(all_notes_ato)):
                                    corrected.append((all_notes_ato[i][0], all_notes_ato[i][1] \
                                                    - (offset - cur_offset)))
                                all_notes_ato = corrected
                                cur_offset = offset
                            else:  # they are equal (or close enough!)
                                cur_offset = offset
                                notes.append(('.'.join(n[0] for n in all_notes_ato), \
                                                all_notes_ato[0][1]))
                                all_notes_ato.clear()  # get ready for next offset interval
                    if (isinstance(element, note.Note)):
                        all_notes_ato.append((str(element.pitch), element.duration.quarterLength))
                    elif (isinstance(element, chord.Chord)):
                        all_notes_ato.append(('.'.join(str(n) for n in element.pitches), \
                                                           element.duration.quarterLength))
                    last_offset = element.offset

        # Add info from that song to the return variables
        notes_by_part[part.partName] = notes    
        keys_by_song.append(list(set(keys)))
    
    return keys_by_song, notes_by_part


In [144]:
directory = './composers'
sub_dir = directory + '/chopin'
songs = []
for filename in listdir(sub_dir):
    file = path.join(sub_dir, filename)
    songs.append(read_midi(file))

Loading Music File: ./composers/chopin/chpn_op23.mid
Loading Music File: ./composers/chopin/chpn-p19.mid
Loading Music File: ./composers/chopin/chpn_op7_2.mid
Loading Music File: ./composers/chopin/chpn-p18.mid
Loading Music File: ./composers/chopin/chpn-p24.mid
Loading Music File: ./composers/chopin/chpn_op7_1.mid
Loading Music File: ./composers/chopin/chpn-p23.mid
Loading Music File: ./composers/chopin/chpn-p9.mid
Loading Music File: ./composers/chopin/chpn-p8.mid
Loading Music File: ./composers/chopin/chpn-p22.mid
Loading Music File: ./composers/chopin/chpn-p20.mid
Loading Music File: ./composers/chopin/chpn-p21.mid
Loading Music File: ./composers/chopin/chp_op18.mid
Loading Music File: ./composers/chopin/chpn_op35_4.mid
Loading Music File: ./composers/chopin/chpn_op33_2.mid
Loading Music File: ./composers/chopin/chp_op31.mid
Loading Music File: ./composers/chopin/chpn_op25_e4.mid
Loading Music File: ./composers/chopin/chpn_op35_2.mid
bpm is None before first note, skipping part
Loa

In [145]:
drop_indices = []   # Drop songs where there is a key change

keys_by_song = []
notes_by_song = []
for i in range(len(songs)):
    keys, notes_by_part = songs[i]
    print(i, keys)
    new_keys = None
    for k in keys:
        if (k):
            new_keys = k
            break
    if (len(new_keys) > 1):
        drop_indices.append(i)
    else:
        keys_by_song.append(new_keys[0])
        cur_notes_by_part = []
        for notes in notes_by_part.values():
            if (notes):
                cur_notes_by_part.append(notes)   # We won't label the parts, they are
        notes_by_song.append(cur_notes_by_part)   # all piano and we will merge them
                                                  # as if they were all played on one piano
print(drop_indices)
print(keys_by_song)

0 [[], ['B- major'], []]
1 [[], ['E- major'], []]
2 [[], ['A major', 'C major'], []]
3 [[], ['A- major'], []]
4 [[], ['F major'], []]
5 [[], ['B- major'], []]
6 [[], ['F major'], []]
7 [[], ['E major'], [], []]
8 [[], ['A major'], [], []]
9 [[], ['B- major'], []]
10 [[], ['E- major'], []]
11 [[], ['B- major'], []]
12 [[], ['D- major', 'E- major'], []]
13 [[], ['D- major'], []]
14 [[], ['D major', 'B- major'], []]
15 [[], ['A major', 'D- major'], []]
16 [[], ['C major'], []]
17 [['G- major'], [], ['G- major'], []]
18 [[], ['A- major'], [], []]
19 [[], ['D major', 'B major', 'B- major'], []]
20 [[], ['A- major', 'E major'], []]
21 [[], ['D- major'], []]
22 [[], ['D- major', 'B- major'], []]
23 [[], ['A- major'], []]
24 [[], ['B major', 'F major'], []]
25 [[], ['E- major'], []]
26 [[], ['G- major'], []]
27 [[], ['C major'], []]
28 [[], ['D- major', 'E major'], []]
29 [[], ['E major'], []]
30 [[], ['E- major'], []]
31 [[], ['D major'], []]
32 [[], ['A major'], []]
33 [[], ['B major'], []]


In [5]:
# For index 17, now index 13 (after dropping those songs with multiple keys)
# i may need to merge the two different parts:
# Let's add up the durations to see if they're equal
# Wow, purty close. I wonder why. Maybe need to look at offsets
index = 13
sum_0 = 0
for i in range(len(notes_by_song[index][0])):
    sum_0 += notes_by_song[index][0][i][1]
sum_1 = 0
for i in range(len(notes_by_song[index][1])):
    sum_1 += notes_by_song[index][1][i][1]
print(sum_0, sum_1)

693.0 905.5000000000001


In [146]:
# Is this because i left out all the rests? I thought inferring the rests from offset and 
# durations would work
# Let's drop it for now and see if we have done the reading/outputting correctly by comparing
# the input and output midis. Update: It works!
notes_by_song.pop(index);

In [7]:
# Have a look at the data
notes_by_song[0]

[[('C3.C2', 2.5),
  ('E-3.E-2', 0.5),
  ('G#3.G#2', 0.5),
  ('B-3.B-2', 0.5),
  ('C4.C3', 0.5),
  ('G#3.G#2', 0.5),
  ('E-4.E-3', 0.5),
  ('B-4.B-3', 0.5),
  ('C5.C4', 0.5),
  ('G#4.G#3', 0.5),
  ('E-5.E-4', 0.5),
  ('B-5.B-4', 0.5),
  ('C6.C5', 0.5),
  ('G5.G4', 0.5),
  ('B-5.B-4', 0.5),
  ('G#5.G#4', 0.5),
  ('G5.G4', 1.0),
  ('F#5.F#4', 0.5),
  ('rest', 1.0),
  ('F#5.F#4', 0.5),
  ('G5.G4', 0.5),
  ('F#5.F#4', 0.5),
  ('F5.F4', 0.5),
  ('F#5.F#4', 0.5),
  ('A5.A4', Fraction(1, 3)),
  ('rest', 0.0),
  ('G5.G4', Fraction(1, 3)),
  ('E-5.E-4', Fraction(1, 3)),
  ('rest', 0.0),
  ('E-5.E-4', 0.75),
  ('D5.D4', 0.25),
  ('F5.F4', Fraction(1, 3)),
  ('E-5.E-4', Fraction(1, 3)),
  ('D5.D4', Fraction(1, 3)),
  ('D5.D4', 1.0),
  ('rest', 3.0),
  ('C5.G3.E-3.C4', 1.5),
  ('G4', 0.5),
  ('D3.G3.E-4.B-4', 3.5),
  ('D2', 0.5),
  ('F#4.C4.D4', 0.5),
  ('B-4', 0.5),
  ('A4', 0.5),
  ('B-3.D4.G2.G4', 1.0),
  ('B-3.D4.G2', 1.0),
  ('D4.G4.B-3.D5', 1.0),
  ('D4.G4.B-3', 1.0),
  ('C4.E-4.G4.A3.C5', 1.

In [147]:
rel_offset = {'C': 0, 'D': 2, 'E': 4, 'F': 5, 'G': 7, 'A': 9, 'B': 11}  # relative offsets from
                                                                        # C in the right-dir
piano_offset = 3  # C1 is the 4th key on the piano (index of 3)

max_octave = 8    # Last key on the piano is C8
notes_in_octave = 12

def note_to_piano_idx(a_note):
    """Simply convert a note in the format {Letter}{Octave} to the 0-based index number of its corresponding
    position on the piano"""
    a_note, octave = a_note[:-1], int(a_note[-1])
    if (int(octave) > max_octave):
        print("WARNING: octave = ", octave)
        return np.array([])
    if (len(a_note) > 1):  
        if (a_note[1] == '-'): # a flat!
            return piano_offset + rel_offset[a_note[0]] + notes_in_octave * (octave - 1) - 1
        elif (a_note[1] == '#'): # a sharp!
            return piano_offset + rel_offset[a_note[0]] + notes_in_octave * (octave - 1) + 1
        else:
            print("Waring: note = ", a_note)
    return piano_offset + rel_offset[a_note[0]] + notes_in_octave * (octave - 1)

In [149]:
n_keys_piano = 88

def transpose_sequence(sequence, transposition):
    """ Perform a right-shift on the keys' part of the vectors
      Effectively, this outputs a new sequence repesenting
      a song but transposed.
      The size of the shift is transposition"""
    if (transposition == 0):
        return sequence
    shift = transposition
    sequence, durations = sequence[:, :-1], sequence[:, -1]
    for i in range(len(sequence)):
        sequence[i] = np.concatenate((sequence[i][-shift:], \
                                      sequence[i][:-shift]))
    return np.insert(sequence, len(sequence[0]), durations, axis = 1)

def songs_to_sequences(songs, augmentation_count = None):
    """Converts the sequences within songs to vector format (an 89 element NumPy Ndarray 
    where the last element is the normalized duration (in quarter notes), and the rest of 
    the elements are 1 for key on and 0 for key off.)"""
    sequences = []
    indices = None
    for song in songs:
        sequence = []
        durations = []
        for element in song[0]:
            vector = np.zeros(n_keys_piano)  # The current boolean array with which keys
                                             # are being pressed. Will add an additional
                                             # 89th element for the duration (normalized
                                             # to be the fraction of this notes duration
                                             # to that of the longest in the file)
            cur_note, duration = element  # in units of quarter-notes
            if ('.' in cur_note): # chord
                notes = cur_note.split('.')
                for cur_note in notes:   
                    vector[note_to_piano_idx(cur_note)] = 1   # chords are already formatted with piano index
            elif (cur_note != 'rest'): # a note
                vector[note_to_piano_idx(cur_note)] = 1
            sequence.append(vector)
            durations.append(float(duration))
        durations /= np.max(durations)  # normalize durations to the maximum in the song
        sequence = np.array(sequence)
        sequence = np.insert(sequence, len(sequence[0]), durations, axis = 1)
        if (augmentation_count):   # Perform data augmentation as in Hewahi, AlSaigal, and AlJanahi 2019
            transpositions = np.random.permutation(11)[:augmentation_count] 
            transpositions = np.insert(transpositions, 0, 0)
            # 11 possible key transpositions
            for transposition in transpositions:
                sequences.append(transpose_sequence(sequence, transposition))
        else:
            sequences.append(sequence)        
                              
    return np.array(sequences)

In [150]:
# Call songs_to_sequences on (notes_by_song)
np.set_printoptions(threshold=maxsize)
chopin_sequences = songs_to_sequences(notes_by_song)

In [154]:
# We need to compactify multiple rests occurring in sequence, as well
# as remove rests of zero duration

def compactify_sequences(sequences):
    compacted_sequences = []
    for sequence in sequences:
        total_duration = 0
        song_sequences = []
        last_was_rest = False # bool to control the updates to total_duration
                              # and appends to song_sequences
        for vector in sequence:
            if (vector[:-1].sum() == 0): # rest
                if (last_was_rest):
                    total_duration += vector[-1]
                else:
                    total_duration = vector[-1]
                    last_was_rest = True
            else:
                if (last_was_rest):
                    if (total_duration != 0):  # ignore rests of zero duration
                        song_sequences.append(np.concatenate((np.zeros(88), [total_duration])))
                    last_was_rest = False
                song_sequences.append(vector)
        compacted_sequences.append(song_sequences)
    
    return compacted_sequences      

In [155]:
print('Before: {} entries for index: {}'.format(len(chopin_sequences[index]), index))
compacted_sequences = compactify_sequences(chopin_sequences)
print('After: {} entries for index: {}'.format(len(compacted_sequences[index]), index)) 

Before: 627 entries for index: 3
After: 622 entries for index: 3


In [158]:
# Okay, let's use the keys to transpose
def transpose_sequences(sequences, keys_by_song):
    transposed_sequences = []  # All sequences transposed to the key of C major
    for i in range(len(sequences)):
        if (keys_by_song[i][0] == 'C'):
            transposed_sequences.append(sequences[i])
        else:
            notes, durations = sequences[i][:, :-1], sequences[i][:, -1]
            transposition = ord('C') - ord(keys_by_song[i][0])
            transposed_sequence = transpose_sequence(notes, transposition)
            transposed_sequences.append(np.insert(transposed_sequence,\
                                                n_keys_piano, durations, axis = 1))   
    return transposed_sequences

In [160]:
transposed_chopin_sequences = transpose_sequences(chopin_sequences, keys_by_song)

In [168]:
# Now would be a good time to convert some sequences back into MIDI format and listen to them
# We can make sure the timing is the same (relative) between the original and the new, and
# confirm that the transpose worked

def convert_to_midi(sequence, output_file = 'music.mid'):
   
    offset = 0
    output_notes = []
    
    all_notes = ['A', 'A#', 'B', 'C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#']

    # create note, chord, and rest objects
    for vector in sequence:
        converted_duration = duration.Duration()
        converted_duration.quarterLength = vector[-1]     # conveted from seconds, assuming 
                                                          #  bpm = 60 (so beats are eqaul to seconds)
        if (np.sum(vector[:-1]) > 1):  # chord
            indices_in_chord = np.argsort(vector[:-1])[-int(np.sum(vector[:-1])):]
            notes_in_chord = [all_notes[i % 12] + str((i // 12) + 1) for i in indices_in_chord]
            notes = []
            for current_note in notes_in_chord:
                new_note = note.Note(current_note)
                new_note.storedInstrument = instrument.Piano()
                notes.append(new_note)
            new_chord = chord.Chord(notes)
            new_chord.offset = offset
            new_chord.duration = converted_duration
            output_notes.append(new_chord)
            
        elif (np.sum(vector[:-1]) == 1):   # note
            index = np.argmax(vector[:-1])
            new_note = all_notes[index % 12] + str((index // 12) + 1)
            new_note = note.Note(new_note)
            new_note.offset = offset
            new_note.storedInstrument = instrument.Piano()
            new_note.duration = converted_duration
            output_notes.append(new_note)
        
        elif (np.sum(vector[:-1]) == 0):   # rest
            new_rest = note.Rest()
            new_rest.offset = offset
            new_rest.duration = converted_duration
            output_notes.append(new_rest)
        offset += vector[-1]
    midi_stream = stream.Stream(output_notes)
    midi_stream.write('midi', fp = output_file)

In [169]:
convert_to_midi(chopin_sequences[0])

Indeed, this midi file is exactly the first few bars of the first imported song (but transposed to C).

Now to apply a window function to get the data into the shape needed for LSTM input

In [162]:
def sequences_to_inputs(sequences, window_size = 16):
    
    X = []
    y = []
    
    for i in range(len(sequences)):
        if (len(sequences[i]) < window_size + 1):
            print("Skipping index ", i, " because the song is too short. Try a shorter window_size to include it.")
            continue
        for j in range(len(sequences[i]) - window_size - 1):
            X.append(sequences[i][j:j + window_size])
            y.append(sequences[i][j + window_size + 1])

    return np.array(X), np.array(y)

In [163]:
X, y = sequences_to_inputs(transposed_chopin_sequences)

# let's shuffle these inputs
X, y = shuffle(X, y)

In [164]:
y.shape

(20241, 89)

In [165]:
X.shape

(20241, 16, 89)

In [166]:
getsizeof(X)

230585600

In [171]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 42)

In [172]:
def lstm(n_lstm_layers = 4, n_dense_layers = 3, n_lstm_nodes = 512, dropout_rate = 0.6, leaky_alpha = None):
    model = Sequential()
    for i in range(n_lstm_layers - 1):
        model.add(LSTM(n_lstm_nodes, return_sequences = True))
        model.add(Dropout(dropout_rate))
    model.add(LSTM(n_lstm_nodes))
    model.add(Dropout(dropout_rate))
    model.add(Dense(n_lstm_nodes // 2))
    if (leaky_alpha):
        model.add(LeakyReLU(alpha = leaky_alpha))   # default is 0.3
    else:
        model.add(Activation('relu'))
    model.add(Dropout(dropout_rate))
    for i in range(n_dense_layers - 1):
        model.add(Dense(n_lstm_nodes // 2))
        model.add(Dropout(0.6))
    model.add(Dense(89))
    model.add(Activation('sigmoid'))
    model.compile(loss = 'binary_crossentropy', optimizer = 'RMSProp', metrics = ['accuracy'])
    return model

In [173]:
X_train.shape

(14168, 16, 89)

In [125]:
model = lstm(n_lstm_layers = 3, n_dense_layers = 2, n_lstm_nodes = 512, dropout_rate = 0.4)
mc = ModelCheckpoint('models/chopin/best_chopin_model_3_2_512_pt4.h5', monitor = 'val_loss', mode = 'min', save_best_only = True, verbose = 1)

In [126]:
history = model.fit(X_train, y_train, batch_size = 512, epochs = 50, \
                    validation_data = (X_test, y_test), verbose = 2, callbacks = [mc])

Epoch 1/50

Epoch 00001: val_loss improved from inf to 0.12283, saving model to best_chopin_model_3_2_512_pt4.h5
29/29 - 156s - loss: 0.2324 - accuracy: 0.0148 - val_loss: 0.1228 - val_accuracy: 0.0141
Epoch 2/50

Epoch 00002: val_loss did not improve from 0.12283
29/29 - 140s - loss: 0.1446 - accuracy: 0.0172 - val_loss: 0.1243 - val_accuracy: 0.0141
Epoch 3/50

Epoch 00003: val_loss improved from 0.12283 to 0.11949, saving model to best_chopin_model_3_2_512_pt4.h5
29/29 - 141s - loss: 0.1353 - accuracy: 0.0152 - val_loss: 0.1195 - val_accuracy: 0.0141
Epoch 4/50

Epoch 00004: val_loss did not improve from 0.11949
29/29 - 146s - loss: 0.1304 - accuracy: 0.0159 - val_loss: 0.1260 - val_accuracy: 0.0141
Epoch 5/50

Epoch 00005: val_loss did not improve from 0.11949
29/29 - 143s - loss: 0.1288 - accuracy: 0.0141 - val_loss: 0.1210 - val_accuracy: 0.0141
Epoch 6/50

Epoch 00006: val_loss did not improve from 0.11949
29/29 - 141s - loss: 0.1264 - accuracy: 0.0143 - val_loss: 0.1197 - val_a

29/29 - 140s - loss: 0.0953 - accuracy: 0.0705 - val_loss: 0.0939 - val_accuracy: 0.0655
Epoch 44/50

Epoch 00044: val_loss did not improve from 0.09394
29/29 - 136s - loss: 0.0947 - accuracy: 0.0746 - val_loss: 0.0940 - val_accuracy: 0.0721
Epoch 45/50

Epoch 00045: val_loss improved from 0.09394 to 0.09316, saving model to best_chopin_model_3_2_512_pt4.h5
29/29 - 136s - loss: 0.0943 - accuracy: 0.0747 - val_loss: 0.0932 - val_accuracy: 0.0698
Epoch 46/50

Epoch 00046: val_loss did not improve from 0.09316
29/29 - 135s - loss: 0.0938 - accuracy: 0.0787 - val_loss: 0.0934 - val_accuracy: 0.0829
Epoch 47/50

Epoch 00047: val_loss did not improve from 0.09316
29/29 - 143s - loss: 0.0933 - accuracy: 0.0859 - val_loss: 0.0933 - val_accuracy: 0.0764
Epoch 48/50

Epoch 00048: val_loss improved from 0.09316 to 0.09295, saving model to best_chopin_model_3_2_512_pt4.h5
29/29 - 151s - loss: 0.0928 - accuracy: 0.0872 - val_loss: 0.0930 - val_accuracy: 0.0663
Epoch 49/50

Epoch 00049: val_loss did

In [127]:
history = model.fit(X_train, y_train, batch_size = 512, epochs = 20, \
                    validation_data = (X_test, y_test), verbose = 2, callbacks = [mc])

Epoch 1/20

Epoch 00001: val_loss did not improve from 0.09193
29/29 - 135s - loss: 0.0911 - accuracy: 0.1010 - val_loss: 0.0923 - val_accuracy: 0.0980
Epoch 2/20

Epoch 00002: val_loss improved from 0.09193 to 0.09156, saving model to best_chopin_model_3_2_512_pt4.h5
29/29 - 135s - loss: 0.0908 - accuracy: 0.1016 - val_loss: 0.0916 - val_accuracy: 0.0991
Epoch 3/20

Epoch 00003: val_loss did not improve from 0.09156
29/29 - 133s - loss: 0.0900 - accuracy: 0.1080 - val_loss: 0.0923 - val_accuracy: 0.0866
Epoch 4/20

Epoch 00004: val_loss improved from 0.09156 to 0.09138, saving model to best_chopin_model_3_2_512_pt4.h5
29/29 - 134s - loss: 0.0894 - accuracy: 0.1102 - val_loss: 0.0914 - val_accuracy: 0.0945
Epoch 5/20

Epoch 00005: val_loss did not improve from 0.09138
29/29 - 137s - loss: 0.0892 - accuracy: 0.1124 - val_loss: 0.0917 - val_accuracy: 0.1028
Epoch 6/20

Epoch 00006: val_loss did not improve from 0.09138
29/29 - 134s - loss: 0.0883 - accuracy: 0.1166 - val_loss: 0.0918 - v

Not bad. Next, we'll see if adding more layers and dropout (effectively using the same hyperparameters as in the best model from Hewahi, AlSaigal, and AlJanahi et al. 2019 ([link](https://www.tandfonline.com/doi/pdf/10.1080/25765299.2019.1649972?needAccess=true))

In [128]:
model = lstm(n_lstm_layers = 4, n_dense_layers = 3, n_lstm_nodes = 512, dropout_rate = 0.6)
mc = ModelCheckpoint('models/chopin/best_chopin_model_recreate.h5', monitor = 'val_loss', mode = 'min', save_best_only = True, verbose = 1)

In [129]:
history = model.fit(X_train, y_train, batch_size = 512, epochs = 75, \
                    validation_data = (X_test, y_test), verbose = 2, callbacks = [mc])

Epoch 1/75

Epoch 00001: val_loss improved from inf to 0.13469, saving model to best_chopin_model_recreate.h5
29/29 - 206s - loss: 0.2863 - accuracy: 0.0139 - val_loss: 0.1347 - val_accuracy: 0.0141
Epoch 2/75

Epoch 00002: val_loss improved from 0.13469 to 0.12133, saving model to best_chopin_model_recreate.h5
29/29 - 188s - loss: 0.1558 - accuracy: 0.0166 - val_loss: 0.1213 - val_accuracy: 0.0141
Epoch 3/75

Epoch 00003: val_loss did not improve from 0.12133
29/29 - 186s - loss: 0.1417 - accuracy: 0.0165 - val_loss: 0.1636 - val_accuracy: 0.0141
Epoch 4/75

Epoch 00004: val_loss did not improve from 0.12133
29/29 - 190s - loss: 0.1369 - accuracy: 0.0150 - val_loss: 0.1378 - val_accuracy: 0.0141
Epoch 5/75

Epoch 00005: val_loss did not improve from 0.12133
29/29 - 186s - loss: 0.1299 - accuracy: 0.0126 - val_loss: 0.1279 - val_accuracy: 0.0141
Epoch 6/75

Epoch 00006: val_loss did not improve from 0.12133
29/29 - 188s - loss: 0.1268 - accuracy: 0.0137 - val_loss: 0.1235 - val_accurac

29/29 - 190s - loss: 0.1158 - accuracy: 0.0155 - val_loss: 0.1139 - val_accuracy: 0.0151
Epoch 50/75

Epoch 00050: val_loss did not improve from 0.11393
29/29 - 185s - loss: 0.1156 - accuracy: 0.0157 - val_loss: 0.1147 - val_accuracy: 0.0141
Epoch 51/75

Epoch 00051: val_loss did not improve from 0.11393
29/29 - 186s - loss: 0.1158 - accuracy: 0.0159 - val_loss: 0.1149 - val_accuracy: 0.0141
Epoch 52/75

Epoch 00052: val_loss did not improve from 0.11393
29/29 - 186s - loss: 0.1153 - accuracy: 0.0158 - val_loss: 0.1141 - val_accuracy: 0.0147
Epoch 53/75

Epoch 00053: val_loss did not improve from 0.11393
29/29 - 188s - loss: 0.1151 - accuracy: 0.0149 - val_loss: 0.1140 - val_accuracy: 0.0147
Epoch 54/75

Epoch 00054: val_loss did not improve from 0.11393
29/29 - 188s - loss: 0.1153 - accuracy: 0.0152 - val_loss: 0.1152 - val_accuracy: 0.0141
Epoch 55/75

Epoch 00055: val_loss did not improve from 0.11393
29/29 - 188s - loss: 0.1157 - accuracy: 0.0154 - val_loss: 0.1141 - val_accuracy: 

Still learning but looks like it may be vanishing gradient. Let's see what happens when we take the last model and apply it to the longer window.

In [174]:
X_32, y_32 = sequences_to_inputs(transposed_chopin_sequences, window_size = 32)

# let's shuffle these inputs
X_32, y_32 = shuffle(X, y)

In [175]:
X_32_train, X_32_test, y_32_train, y_32_test = train_test_split(X_32, y_32, test_size = 0.3, random_state = 42)

In [130]:
model = lstm(n_lstm_layers = 3, n_dense_layers = 2, n_lstm_nodes = 512, dropout_rate = 0.4)
mc = ModelCheckpoint('models/chopin/best_chopin_model_32_3_2_512_pt4.h5', monitor = 'val_loss', mode = 'min', save_best_only = True, verbose = 1)

In [131]:
history = model.fit(X_32_train, y_32_train, batch_size = 512, epochs = 50, \
                    validation_data = (X_32_test, y_32_test), verbose = 2, callbacks = [mc])

Epoch 1/50

Epoch 00001: val_loss improved from inf to 0.12155, saving model to best_chopin_model_3_2_512_pt4.h5
29/29 - 155s - loss: 0.2410 - accuracy: 0.0146 - val_loss: 0.1216 - val_accuracy: 0.0131
Epoch 2/50

Epoch 00002: val_loss improved from 0.12155 to 0.11923, saving model to best_chopin_model_3_2_512_pt4.h5
29/29 - 137s - loss: 0.1441 - accuracy: 0.0170 - val_loss: 0.1192 - val_accuracy: 0.0131
Epoch 3/50

Epoch 00003: val_loss did not improve from 0.11923
29/29 - 135s - loss: 0.1340 - accuracy: 0.0179 - val_loss: 0.1234 - val_accuracy: 0.0131
Epoch 4/50

Epoch 00004: val_loss did not improve from 0.11923
29/29 - 136s - loss: 0.1294 - accuracy: 0.0148 - val_loss: 0.1204 - val_accuracy: 0.0131
Epoch 5/50

Epoch 00005: val_loss improved from 0.11923 to 0.11754, saving model to best_chopin_model_3_2_512_pt4.h5
29/29 - 134s - loss: 0.1272 - accuracy: 0.0146 - val_loss: 0.1175 - val_accuracy: 0.0131
Epoch 6/50

Epoch 00006: val_loss did not improve from 0.11754
29/29 - 137s - loss

29/29 - 335s - loss: 0.0977 - accuracy: 0.0520 - val_loss: 0.0955 - val_accuracy: 0.0476
Epoch 45/50

Epoch 00045: val_loss did not improve from 0.09549
29/29 - 251s - loss: 0.0974 - accuracy: 0.0524 - val_loss: 0.0956 - val_accuracy: 0.0488
Epoch 46/50

Epoch 00046: val_loss did not improve from 0.09549
29/29 - 217s - loss: 0.0969 - accuracy: 0.0562 - val_loss: 0.0961 - val_accuracy: 0.0600
Epoch 47/50

Epoch 00047: val_loss improved from 0.09549 to 0.09495, saving model to best_chopin_model_3_2_512_pt4.h5
29/29 - 222s - loss: 0.0964 - accuracy: 0.0605 - val_loss: 0.0949 - val_accuracy: 0.0552
Epoch 48/50

Epoch 00048: val_loss improved from 0.09495 to 0.09458, saving model to best_chopin_model_3_2_512_pt4.h5
29/29 - 210s - loss: 0.0959 - accuracy: 0.0593 - val_loss: 0.0946 - val_accuracy: 0.0520
Epoch 49/50

Epoch 00049: val_loss did not improve from 0.09458
29/29 - 205s - loss: 0.0955 - accuracy: 0.0595 - val_loss: 0.0954 - val_accuracy: 0.0652
Epoch 50/50

Epoch 00050: val_loss did

In [132]:
history = model.fit(X_32_train, y_32_train, batch_size = 512, epochs = 20, \
                    validation_data = (X_32_test, y_32_test), verbose = 2, callbacks = [mc])

Epoch 1/20

Epoch 00001: val_loss did not improve from 0.09458
29/29 - 178s - loss: 0.0950 - accuracy: 0.0638 - val_loss: 0.0947 - val_accuracy: 0.0628
Epoch 2/20

Epoch 00002: val_loss improved from 0.09458 to 0.09324, saving model to best_chopin_model_3_2_512_pt4.h5
29/29 - 178s - loss: 0.0943 - accuracy: 0.0688 - val_loss: 0.0932 - val_accuracy: 0.0741
Epoch 3/20

Epoch 00003: val_loss did not improve from 0.09324
29/29 - 167s - loss: 0.0938 - accuracy: 0.0706 - val_loss: 0.0936 - val_accuracy: 0.0605
Epoch 4/20

Epoch 00004: val_loss did not improve from 0.09324
29/29 - 180s - loss: 0.0936 - accuracy: 0.0710 - val_loss: 0.0936 - val_accuracy: 0.0674
Epoch 5/20

Epoch 00005: val_loss improved from 0.09324 to 0.09277, saving model to best_chopin_model_3_2_512_pt4.h5
29/29 - 178s - loss: 0.0930 - accuracy: 0.0763 - val_loss: 0.0928 - val_accuracy: 0.0748
Epoch 6/20

Epoch 00006: val_loss improved from 0.09277 to 0.09227, saving model to best_chopin_model_3_2_512_pt4.h5
29/29 - 175s - 

In [133]:
history = model.fit(X_32_train, y_32_train, batch_size = 512, epochs = 20, \
                    validation_data = (X_32_test, y_32_test), verbose = 2, callbacks = [mc])

Epoch 1/20

Epoch 00001: val_loss improved from 0.08975 to 0.08878, saving model to best_chopin_model_3_2_512_pt4.h5
29/29 - 183s - loss: 0.0845 - accuracy: 0.1332 - val_loss: 0.0888 - val_accuracy: 0.1148
Epoch 2/20

Epoch 00002: val_loss did not improve from 0.08878
29/29 - 174s - loss: 0.0839 - accuracy: 0.1385 - val_loss: 0.0898 - val_accuracy: 0.1071
Epoch 3/20

Epoch 00003: val_loss did not improve from 0.08878
29/29 - 179s - loss: 0.0834 - accuracy: 0.1433 - val_loss: 0.0890 - val_accuracy: 0.1254
Epoch 4/20

Epoch 00004: val_loss improved from 0.08878 to 0.08871, saving model to best_chopin_model_3_2_512_pt4.h5
29/29 - 172s - loss: 0.0828 - accuracy: 0.1443 - val_loss: 0.0887 - val_accuracy: 0.1174
Epoch 5/20

Epoch 00005: val_loss did not improve from 0.08871
29/29 - 175s - loss: 0.0822 - accuracy: 0.1491 - val_loss: 0.0894 - val_accuracy: 0.1174
Epoch 6/20

Epoch 00006: val_loss did not improve from 0.08871
29/29 - 180s - loss: 0.0813 - accuracy: 0.1554 - val_loss: 0.0898 - v

KeyboardInterrupt: 

With a longer window, we got to a slightly lower loss (.08871 vs .08950) but the training took a bit longer (best after Epoch 74 compared to Epoch 65 before). I am a bit disappointed but I still like a window of 32 better (and possibly even longer) since in music, the memory should last longer than 16 vectors, which at normal BPM (Beats Per Minute) amounts to about a few seconds. 

I am also surprised by how fast each the training time per Epoch was. It was about the same as before. Why is this this the case?

Next, I will try using the Leaky ReLU activation function to combat the vanishing gradient problem. This function has just one parameter, alpha, which controls the multiplier (alpha = 0 is the same as regular ReLU). I will begin with keras' default value of 0.3

In [137]:
# Trying LeakyReLU with default alpha = 0.3 with 4 LSTM and 3 Dense layers again:
model = lstm(n_lstm_layers = 4, n_dense_layers = 3, n_lstm_nodes = 512, dropout_rate = 0.6, leaky_alpha = 0.3)
mc = ModelCheckpoint('best_chopin_model_LRLpt3_recreate.h5', monitor = 'val_loss', mode = 'min', save_best_only = True, verbose = 1)

In [138]:
history = model.fit(X_train, y_train, batch_size = 512, epochs = 50, \
                    validation_data = (X_test, y_test), verbose = 2, callbacks = [mc])

Epoch 1/50

Epoch 00001: val_loss improved from inf to 0.15351, saving model to best_chopin_model_LRLpt3_recreate.h5
29/29 - 219s - loss: 0.3453 - accuracy: 0.0140 - val_loss: 0.1535 - val_accuracy: 0.0141
Epoch 2/50

Epoch 00002: val_loss did not improve from 0.15351
29/29 - 206s - loss: 0.1595 - accuracy: 0.0181 - val_loss: 0.1536 - val_accuracy: 0.0141
Epoch 3/50

Epoch 00003: val_loss improved from 0.15351 to 0.12294, saving model to best_chopin_model_LRLpt3_recreate.h5
29/29 - 210s - loss: 0.1437 - accuracy: 0.0173 - val_loss: 0.1229 - val_accuracy: 0.0141
Epoch 4/50

Epoch 00004: val_loss improved from 0.12294 to 0.12121, saving model to best_chopin_model_LRLpt3_recreate.h5
29/29 - 215s - loss: 0.1350 - accuracy: 0.0146 - val_loss: 0.1212 - val_accuracy: 0.0141
Epoch 5/50

Epoch 00005: val_loss did not improve from 0.12121
29/29 - 210s - loss: 0.1279 - accuracy: 0.0137 - val_loss: 0.1215 - val_accuracy: 0.0141
Epoch 6/50

Epoch 00006: val_loss improved from 0.12121 to 0.11992, sa

KeyboardInterrupt: 

The model seems to perform even worse. Now let's try with alpha = 0.1

In [140]:
model = lstm(n_lstm_layers = 4, n_dense_layers = 3, n_lstm_nodes = 512, dropout_rate = 0.6, leaky_alpha = 0.1)
mc = ModelCheckpoint('best_chopin_model_LRLpt1_recreate.h5', monitor = 'val_loss', mode = 'min', save_best_only = True, verbose = 1)

In [141]:
history = model.fit(X_train, y_train, batch_size = 512, epochs = 50, \
                    validation_data = (X_test, y_test), verbose = 2, callbacks = [mc])

Epoch 1/50

Epoch 00001: val_loss improved from inf to 0.17031, saving model to best_chopin_model_LRLpt1_recreate.h5
29/29 - 215s - loss: 0.2997 - accuracy: 0.0156 - val_loss: 0.1703 - val_accuracy: 0.0141
Epoch 2/50

Epoch 00002: val_loss improved from 0.17031 to 0.12196, saving model to best_chopin_model_LRLpt1_recreate.h5
29/29 - 204s - loss: 0.1539 - accuracy: 0.0156 - val_loss: 0.1220 - val_accuracy: 0.0141
Epoch 3/50

Epoch 00003: val_loss did not improve from 0.12196
29/29 - 201s - loss: 0.1408 - accuracy: 0.0166 - val_loss: 0.1503 - val_accuracy: 0.0141
Epoch 4/50

Epoch 00004: val_loss did not improve from 0.12196
29/29 - 199s - loss: 0.1331 - accuracy: 0.0147 - val_loss: 0.1247 - val_accuracy: 0.0141
Epoch 5/50

Epoch 00005: val_loss did not improve from 0.12196
29/29 - 208s - loss: 0.1283 - accuracy: 0.0145 - val_loss: 0.1225 - val_accuracy: 0.0141
Epoch 6/50

Epoch 00006: val_loss improved from 0.12196 to 0.12183, saving model to best_chopin_model_LRLpt1_recreate.h5
29/29 -

KeyboardInterrupt: 

This model performs even worse. Could try to increase alpha but that will decrease the effect of having a non-linear
activation function. Better to try something else now.

The best model is still best_chopin_model_3_2_512_pt4.h5. Let's go to even fewer layers.

In [47]:
def generate_musical_sequence(model, no_of_timesteps = 16, index = None, threshold = 0.5):
    if (index is None):
        index = np.random.randint(0, len(X_test) - 1)
        print('random index is ', index)
    random_music = X_test[index]
    original_random_music = random_music.copy()
    predictions_new = []
    for i in range(no_of_timesteps):
        random_music = random_music.reshape(1, no_of_timesteps, n_keys_piano + 1)                    
        prob = model.predict(random_music)[0]
        y_pred = [0 if p < threshold else 1 for p in prob[:-1]]
        y_pred = np.insert(y_pred, len(y_pred), prob[-1])
        # print(prob)
        predictions_new.append(y_pred)
        random_music = np.insert(random_music, len(random_music), y_pred, axis = 0)[1:, :]
    
    return original_random_music, np.array(predictions_new).astype(np.float64)

In [48]:
random_chopin_sequence, new_chopin_sequence = generate_musical_sequence(model)
convert_to_midi(random_chopin_sequence, 'random.mid')

random index is  5617


In [49]:
convert_to_midi(new_chopin_sequence, 'new.mid')