# Music Generation with LSTM

In [12]:
import glob
import numpy as np
import pickle
import mido
import random
from mido import MidiFile, Message, MidiTrack
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, Activation, Bidirectional, LayerNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import load_model
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras import backend as K
from keras_self_attention import SeqSelfAttention

In [2]:
# LUT for array indices storing note bits
channels = [0,1,2,9]
chanToArr = [0,1,2,0,0,0,0,0,0,3]

def parse( input_midi ):
    i = 1               # counter for each print statement
    listo = []

    prevChan = 0        # keeps track of previous channel to detect channel changes
    chanIt = 0          # iterates to the next channel in the forced order 0->1->2->9
    arr = [[0]*128]*4    # 12 notes each for channels(0,1,2,9) to keep track of continued notes

    # insert MIDI file here
    mid = MidiFile(input_midi) 
    temp = mido.merge_tracks(mid.tracks)

    # processing MIDI file
    for msg in temp:
        # look for note changes
        if(msg.type=='note_on'):

            # check for a new channel
            if(msg.channel != prevChan): 
                chanIt = chanIt + 1
                if(chanIt == 4):
                    chanIt = chanIt -4
                    listo.append([0,0,0])
                    i=i+1

                # iterate chanIt to "catch up" to the next listed channel
                while( channels[chanIt] != msg.channel):

                    # continue notes if not turned off
                    for a in range(len(arr[chanToArr[ channels[chanIt] ]])):
                        if(arr[chanToArr[ channels[chanIt] ]][a]>0):
                            listo[ arr[chanToArr[ channels[chanIt] ]][a] - 1 ][2] = listo[ arr[chanToArr[ channels[chanIt] ]][a] - 1 ][2] + 1
                    chanIt = chanIt + 1
                    if(chanIt == 4):
                        chanIt = chanIt -4
                        listo.append([0,0,0])
                        i=i+1
                    
            # check if note_on event is on or off switch
            if(msg.velocity==0):
                noteSwitch = "off"
                arr[chanToArr[msg.channel]][msg.note] = 0
            else:
                noteSwitch = "on"
                arr[chanToArr[msg.channel]][msg.note] = i        
                # print new note
                listo.append([msg.note , msg.channel, 1])
                i=i+1

            # update prevChan for detection
            prevChan = msg.channel

    for t in range(len(listo)):
        listo[t] = str(listo[t][0]) + "." + str(listo[t][1]) + "." + str(listo[t][2])
        
    return listo

In [3]:
def get_notes():
    i=0
    notes = None
    for file in glob.glob("samples/large/*.mid"):

        print("Parsing %s" % file)        
        if(i==0):
            notes = parse(file)
            i=i+1
        else:
            notes = np.concatenate((notes, parse(file)))
    
    pickle.dump(notes, open('notes.p', 'wb'))
    return notes

def prepare_sequences(notes, n_vocab):
    """ Prepare the sequences used by the Neural Network """
    sequence_length = 16

    pitchnames = sorted(set(item for item in notes))

    note_to_int = dict((note, number) for number, note in enumerate(pitchnames))
    
    network_input = []
    network_output = []

    for i in range(0, len(notes) - sequence_length, 1):
        sequence_in = notes[i:i + sequence_length]
        sequence_out = notes[i + sequence_length]
        network_input.append([note_to_int[char] for char in sequence_in])
        network_output.append(note_to_int[sequence_out])
    
    n_patterns = len(network_input)
    # reshape the input into a format compatible with LSTM layers
    network_input = np.reshape(network_input, (n_patterns, sequence_length, 1))

    # normalize input
    network_input = network_input / float(n_vocab)

    network_output = to_categorical(network_output)
    
    return (network_input, network_output)

In [4]:
def create_network(network_input, n_vocab):
    """ create the structure of the neural network """
    hidden_layers = 256
    dropout = 0.4
    
    """ Initializing model """
    model = Sequential()
    
    """ Adding LSTM Layers to Model """
    model.add(
        Bidirectional(
            LSTM(
                hidden_layers,
                dropout=dropout,
                return_sequences=True
            ),
            input_shape=(network_input.shape[1], network_input.shape[2])
        )
    )
    model.add(SeqSelfAttention())
    model.add(LayerNormalization())
    model.add(
        Bidirectional(
            LSTM(
                hidden_layers,
                dropout=dropout,
                return_sequences=True
            )
        )
    )
    model.add(SeqSelfAttention())
    model.add(LayerNormalization())
    model.add(
        Bidirectional(
            LSTM(
                hidden_layers,
                dropout=dropout
            )
        )
    )
    model.add(LayerNormalization())
    
    """ Add other layers after LSTM """
    model.add(Dropout(dropout))
    model.add(Dense(hidden_layers))
    # model.add(Activation('relu'))
    model.add(LayerNormalization())
    model.add(Dropout(dropout))
    model.add(Dense(n_vocab))
    model.add(Activation('softmax'))
    
    model.compile(loss='categorical_crossentropy', optimizer=Adam(clipnorm=1.0))

    return model

In [5]:
def train_network(load_notes=False, to_load_model=False, learning_rate=None):
    """ Train a Neural Network to generate music """
    notes = None
    model = None

    if load_notes:
        notes = pickle.load(open('notes.p', 'rb'))
    else:
        notes = get_notes()
    
    n_vocab = len(set(notes))
    network_input, network_output = prepare_sequences(notes, n_vocab)

    if to_load_model:
        model = load_model('weights.hdf5', custom_objects={'SeqSelfAttention': SeqSelfAttention})
    else:
        model = create_network(network_input, n_vocab)

    if learning_rate != None:
        K.set_value(model.optimizer.learning_rate, learning_rate)

    checkpoint = ModelCheckpoint(
        'weights.hdf5',
        monitor='loss',
        save_best_only=True,
        mode='min'
    )

    callbacks_list = [checkpoint]
    
    model.summary()

    model.fit(
        x=network_input,
        y=network_output,
        batch_size=32,
        epochs=3000,
        callbacks=callbacks_list
    )

In [6]:
train_network()

Parsing samples/large/091_DragonWarriorIV_40_41HornofBaron.mid
Parsing samples/large/090_DragonWarriorIII_06_07Victory.mid
Parsing samples/large/090_DragonWarriorIII_05_06BattleTheme.mid
Parsing samples/large/091_DragonWarriorIV_09_10MysteriousShrine.mid
Parsing samples/large/090_DragonWarriorIII_19_20Cursed.mid
Parsing samples/large/091_DragonWarriorIV_19_20Jackpot.mid
Parsing samples/large/090_DragonWarriorIII_30_31IntoTheLegend.mid
Parsing samples/large/091_DragonWarriorIV_03_04SolitaryWarrior.mid
Parsing samples/large/088_DragonWarrior_00_01Overture.mid
Parsing samples/large/091_DragonWarriorIV_27_28ExpandingtheSeaMap.mid
Parsing samples/large/091_DragonWarriorIV_39_40TheUnknownCastle.mid
Parsing samples/large/091_DragonWarriorIV_06_07ImportantItemDiscovery.mid
Parsing samples/large/091_DragonWarriorIV_05_06CaveofFear.mid
Parsing samples/large/091_DragonWarriorIV_37_38BalloonsFlight.mid
Parsing samples/large/090_DragonWarriorIII_03_04Rondo.mid
Parsing samples/large/090_DragonWarrio

Epoch 2/3000
Epoch 3/3000
Epoch 4/3000
Epoch 5/3000
Epoch 6/3000
Epoch 7/3000
Epoch 8/3000
Epoch 9/3000
Epoch 10/3000
Epoch 11/3000
Epoch 12/3000
Epoch 13/3000
Epoch 14/3000
Epoch 15/3000
Epoch 16/3000
Epoch 17/3000
Epoch 18/3000
Epoch 19/3000
Epoch 20/3000
Epoch 21/3000
Epoch 22/3000
Epoch 23/3000
Epoch 24/3000
Epoch 25/3000
Epoch 26/3000
Epoch 27/3000
Epoch 28/3000
Epoch 29/3000
Epoch 30/3000
Epoch 31/3000
Epoch 32/3000
Epoch 33/3000
Epoch 34/3000
Epoch 35/3000
Epoch 36/3000
Epoch 37/3000
Epoch 38/3000
Epoch 39/3000
Epoch 40/3000
Epoch 41/3000
Epoch 42/3000
Epoch 43/3000
Epoch 44/3000
Epoch 45/3000
Epoch 46/3000
Epoch 47/3000
Epoch 48/3000
Epoch 49/3000
Epoch 50/3000
Epoch 51/3000
Epoch 52/3000
Epoch 53/3000
Epoch 54/3000
Epoch 55/3000
Epoch 56/3000
Epoch 57/3000
Epoch 58/3000
Epoch 59/3000
Epoch 60/3000
Epoch 61/3000
Epoch 62/3000
Epoch 63/3000
Epoch 64/3000
Epoch 65/3000
Epoch 66/3000
Epoch 67/3000
Epoch 68/3000
Epoch 69/3000
Epoch 70/3000
Epoch 71/3000
Epoch 72/3000
Epoch 73/3000


Epoch 95/3000
Epoch 96/3000
Epoch 97/3000
Epoch 98/3000
Epoch 99/3000
Epoch 100/3000
Epoch 101/3000
Epoch 102/3000
Epoch 103/3000
Epoch 104/3000
Epoch 105/3000
Epoch 106/3000
Epoch 107/3000
Epoch 108/3000
Epoch 109/3000
Epoch 110/3000
Epoch 111/3000
Epoch 112/3000
Epoch 113/3000
Epoch 114/3000
Epoch 115/3000
Epoch 116/3000
Epoch 117/3000
Epoch 118/3000
Epoch 119/3000
Epoch 120/3000
Epoch 121/3000
Epoch 122/3000
Epoch 123/3000
Epoch 124/3000
Epoch 125/3000
Epoch 126/3000
Epoch 127/3000
Epoch 128/3000
Epoch 129/3000
Epoch 130/3000
Epoch 131/3000
Epoch 132/3000
Epoch 133/3000
Epoch 134/3000
Epoch 135/3000
Epoch 136/3000
Epoch 137/3000
Epoch 138/3000
Epoch 139/3000
Epoch 140/3000
Epoch 141/3000
Epoch 142/3000
Epoch 143/3000
Epoch 144/3000
Epoch 145/3000
Epoch 146/3000
Epoch 147/3000
Epoch 148/3000
Epoch 149/3000
Epoch 150/3000
Epoch 151/3000
Epoch 152/3000
Epoch 153/3000
Epoch 154/3000
Epoch 155/3000
Epoch 156/3000
Epoch 157/3000
Epoch 158/3000
Epoch 159/3000
Epoch 160/3000
Epoch 161/3000


Epoch 187/3000
Epoch 188/3000
Epoch 189/3000
Epoch 190/3000
Epoch 191/3000
Epoch 192/3000
Epoch 193/3000
Epoch 194/3000
Epoch 195/3000
Epoch 196/3000
Epoch 197/3000
Epoch 198/3000
Epoch 199/3000
Epoch 200/3000
Epoch 201/3000
Epoch 202/3000
Epoch 203/3000
Epoch 204/3000
Epoch 205/3000
Epoch 206/3000
Epoch 207/3000
Epoch 208/3000
Epoch 209/3000
Epoch 210/3000
Epoch 211/3000
Epoch 212/3000
Epoch 213/3000
Epoch 214/3000
Epoch 215/3000
Epoch 216/3000
Epoch 217/3000
Epoch 218/3000
Epoch 219/3000
Epoch 220/3000
Epoch 221/3000
Epoch 222/3000
Epoch 223/3000
Epoch 224/3000
Epoch 225/3000
Epoch 226/3000
Epoch 227/3000
Epoch 228/3000
Epoch 229/3000
Epoch 230/3000
Epoch 231/3000
Epoch 232/3000
Epoch 233/3000
Epoch 234/3000
Epoch 235/3000
Epoch 236/3000
Epoch 237/3000
Epoch 238/3000
Epoch 239/3000
Epoch 240/3000
Epoch 241/3000
Epoch 242/3000
Epoch 243/3000
Epoch 244/3000
Epoch 245/3000
Epoch 246/3000
Epoch 247/3000
Epoch 248/3000
Epoch 249/3000
Epoch 250/3000
Epoch 251/3000
Epoch 252/3000
Epoch 253/

Epoch 279/3000
Epoch 280/3000
Epoch 281/3000
Epoch 282/3000
Epoch 283/3000
Epoch 284/3000
Epoch 285/3000
Epoch 286/3000
Epoch 287/3000
Epoch 288/3000
Epoch 289/3000
Epoch 290/3000
Epoch 291/3000
Epoch 292/3000
Epoch 293/3000
Epoch 294/3000
Epoch 295/3000
Epoch 296/3000
Epoch 297/3000
Epoch 298/3000
Epoch 299/3000
Epoch 300/3000
Epoch 301/3000
Epoch 302/3000
Epoch 303/3000
Epoch 304/3000
Epoch 305/3000
Epoch 306/3000
Epoch 307/3000
Epoch 308/3000
Epoch 309/3000
Epoch 310/3000
Epoch 311/3000
Epoch 312/3000
Epoch 313/3000
Epoch 314/3000
Epoch 315/3000
Epoch 316/3000
Epoch 317/3000
Epoch 318/3000
Epoch 319/3000
Epoch 320/3000
Epoch 321/3000
Epoch 322/3000
Epoch 323/3000
Epoch 324/3000
Epoch 325/3000
Epoch 326/3000
Epoch 327/3000
Epoch 328/3000
Epoch 329/3000
Epoch 330/3000
Epoch 331/3000
Epoch 332/3000
Epoch 333/3000
Epoch 334/3000
Epoch 335/3000
Epoch 336/3000
Epoch 337/3000
Epoch 338/3000
Epoch 339/3000
Epoch 340/3000
Epoch 341/3000
Epoch 342/3000
Epoch 343/3000
Epoch 344/3000
Epoch 345/

Epoch 371/3000
Epoch 372/3000
Epoch 373/3000
Epoch 374/3000
Epoch 375/3000
Epoch 376/3000
Epoch 377/3000
Epoch 378/3000
Epoch 379/3000
Epoch 380/3000
Epoch 381/3000
Epoch 382/3000
Epoch 383/3000
Epoch 384/3000
Epoch 385/3000
Epoch 386/3000
Epoch 387/3000
Epoch 388/3000
Epoch 389/3000
Epoch 390/3000
Epoch 391/3000
Epoch 392/3000
Epoch 393/3000
Epoch 394/3000
Epoch 395/3000
Epoch 396/3000
Epoch 397/3000
Epoch 398/3000
Epoch 399/3000
Epoch 400/3000
Epoch 401/3000
Epoch 402/3000
Epoch 403/3000
Epoch 404/3000
Epoch 405/3000
Epoch 406/3000
Epoch 407/3000
Epoch 408/3000
Epoch 409/3000
Epoch 410/3000
Epoch 411/3000
Epoch 412/3000
Epoch 413/3000
Epoch 414/3000
Epoch 415/3000
Epoch 416/3000
Epoch 417/3000
Epoch 418/3000
Epoch 419/3000
Epoch 420/3000
Epoch 421/3000
Epoch 422/3000
Epoch 423/3000
Epoch 424/3000
Epoch 425/3000
Epoch 426/3000
Epoch 427/3000
Epoch 428/3000
Epoch 429/3000
Epoch 430/3000
Epoch 431/3000
Epoch 432/3000
Epoch 433/3000
Epoch 434/3000
Epoch 435/3000
Epoch 436/3000
Epoch 437/

Epoch 463/3000
Epoch 464/3000
Epoch 465/3000
Epoch 466/3000
Epoch 467/3000
Epoch 468/3000
Epoch 469/3000
Epoch 470/3000
Epoch 471/3000
Epoch 472/3000
Epoch 473/3000
Epoch 474/3000
Epoch 475/3000
Epoch 476/3000
Epoch 477/3000
Epoch 478/3000
Epoch 479/3000
Epoch 480/3000
Epoch 481/3000
Epoch 482/3000
Epoch 483/3000
Epoch 484/3000
Epoch 485/3000
Epoch 486/3000
Epoch 487/3000
Epoch 488/3000
Epoch 489/3000
Epoch 490/3000
Epoch 491/3000
Epoch 492/3000
Epoch 493/3000
Epoch 494/3000
Epoch 495/3000
Epoch 496/3000
Epoch 497/3000
Epoch 498/3000
Epoch 499/3000
Epoch 500/3000
Epoch 501/3000
Epoch 502/3000
Epoch 503/3000
Epoch 504/3000
Epoch 505/3000
Epoch 506/3000
Epoch 507/3000
Epoch 508/3000
Epoch 509/3000
Epoch 510/3000
Epoch 511/3000
Epoch 512/3000
Epoch 513/3000
Epoch 514/3000
Epoch 515/3000
Epoch 516/3000
Epoch 517/3000
Epoch 518/3000
Epoch 519/3000
Epoch 520/3000
Epoch 521/3000
Epoch 522/3000
Epoch 523/3000
Epoch 524/3000
Epoch 525/3000
Epoch 526/3000
Epoch 527/3000
Epoch 528/3000
Epoch 529/

KeyboardInterrupt: 

In [7]:
def prepare_sequences_prediction(notes, pitchnames, n_vocab):

    note_to_int = dict((note, number) for number, note in enumerate(pitchnames))
    sequence_length = 16
    network_input = []
    output = []
    for i in range(0, len(notes) - sequence_length, 1):
        sequence_in = notes[i:i + sequence_length]
        sequence_out = notes[i + sequence_length]
        network_input.append([note_to_int[char] for char in sequence_in])
        output.append(note_to_int[sequence_out])

    n_patterns = len(network_input)

    # reshape the input into a format compatible with LSTM layers
    normalized_input = np.reshape(network_input, (n_patterns, sequence_length, 1))
    # normalize input
    normalized_input = normalized_input / float(n_vocab)

    return (network_input, normalized_input)

In [8]:
def generate_notes(model, network_input, pitchnames, n_vocab):
    """ Generate notes from the neural network based on a sequence of notes """
    # Starts the melody by picking a random sequence from the input as a starting point
    start = np.random.randint(0, len(network_input)-1)

    int_to_note = dict((number, note) for number, note in enumerate(pitchnames))

    pattern = network_input[start]
    prediction_output = []


    for note_index in range(1000):
        prediction_input = np.reshape(pattern, (1, len(pattern), 1))
        prediction_input = prediction_input / float(n_vocab)

        ### Copy the line below from your above implementation.
        prediction = model.predict(prediction_input)
        
        sum = 0
        i = 0
        
        for a in prediction[0]:
            sum = sum + a
        x = random.random() * sum
        
        for a in prediction[0]:
            x = x - a
            if(x < 0):
                break
            i = i+1

        index = i
        result = int_to_note[index]
        prediction_output.append(result)

        pattern.append(index)
        pattern = pattern[1:len(pattern)]
        
    return prediction_output

In [9]:
#From: https://stackoverflow.com/questions/1806278/convert-fraction-to-float
def convert_to_float(frac_str):
    try:
        return float(frac_str)
    except ValueError:
        num, denom = frac_str.split('/')
        try:
            leading, num = num.split(' ')
            whole = float(leading)
        except ValueError:
            whole = 0
        frac = float(num) / float(denom)
        return whole - frac if whole < 0 else whole + frac

def create_midi(prediction_output):
    """ convert the output from the prediction to notes and create a midi file
        from the notes """
    offset = 0
    output_notes = []

    # create note and chord objects based on the values generated by the model
    for pattern in prediction_output:
        pattern = pattern.split()
        temp = pattern[0]
        duration = pattern[1]
        pattern = temp
        # pattern is a chord
        if ('.' in pattern) or pattern.isdigit():
            notes_in_chord = pattern.split('.')
            notes = []
            for current_note in notes_in_chord:
                new_note = note.Note(int(current_note))
                new_note.storedInstrument = instrument.Piano()
                notes.append(new_note)
            new_chord = chord.Chord(notes)
            new_chord.offset = offset
            output_notes.append(new_chord)
        # pattern is a rest
        elif('rest' in pattern):
            new_rest = note.Rest(pattern)
            new_rest.offset = offset
            new_rest.storedInstrument = instrument.Piano() #???
            output_notes.append(new_rest)
        # pattern is a note
        else:
            new_note = note.Note(pattern)
            new_note.offset = offset
            new_note.storedInstrument = instrument.Piano()
            output_notes.append(new_note)
        # increase offset each iteration so that notes do not stack
        offset += convert_to_float(duration)

    midi_stream = stream.Stream(output_notes)

    midi_stream.write('midi', fp='test_output.mid')

In [10]:
def generate():
    notes = pickle.load(open('notes.p', 'rb'))
    pitchnames = sorted(set(item for item in notes))
    n_vocab = len(set(notes))

    network_input, normalized_input = prepare_sequences_prediction(notes, pitchnames, n_vocab)
    model = create_network(normalized_input, n_vocab)
    
    ### Add a line to load the weights here
    
    model.load_weights("weights.hdf5")
    
    prediction_output = generate_notes(model, network_input, pitchnames, n_vocab)
    create_midi(prediction_output)
    
def create_midi(prediction_output):
    i = 0
    for a in prediction_output:
        prediction_output[i] = [int(x) for x in a.split('.')]
        i=i+1
    
    t0notes = []
    t1notes = []
    t2notes = []
    t9notes = []
    
    for a in prediction_output:
        if(a[2] == 0):
            pass
        else:       
            if( a[1] == 0 ):
                t0notes.append([a[0],a[2]])
            elif( a[1] == 1 ):
                t1notes.append([a[0],a[2]])
            elif( a[1] == 2 ):
                t2notes.append([a[0],a[2]])
            elif( a[1] == 9 ):
                t9notes.append([a[0],a[2]])
                                    
#     print(t0notes)
#     print(t1notes)
#     print(t2notes)
#     print(t9notes)
    
    
    mid = MidiFile()
    mTrack = MidiTrack()
    track0 = MidiTrack()
    track1 = MidiTrack()
    track2 = MidiTrack()
    track9 = MidiTrack()
    mid.tracks.append(mTrack)
    mid.tracks.append(track0)
    mid.tracks.append(track1)
    mid.tracks.append(track2)
    mid.tracks.append(track9)

    track0.append(Message('program_change', channel=0, program=80, time=0))
    track1.append(Message('program_change', channel=1, program=81, time=0))
    track2.append(Message('program_change', channel=2, program=38, time=0))
    track9.append(Message('program_change', channel=9, program=121, time=0))

    for a in t0notes:
        track0.append(Message('note_on', channel=0, note=a[0], time=a[1]*100))
        track0.append(Message('note_on', channel=0, note=a[0], velocity=0))
    for a in t1notes:
        track1.append(Message('note_on', channel=1, note=a[0], time=a[1]*100))
        track1.append(Message('note_on', channel=1, note=a[0], velocity=0))
    for a in t2notes:
        track2.append(Message('note_on', channel=2, note=a[0], time=a[1]*100))
        track2.append(Message('note_on', channel=2, note=a[0], velocity=0))
    for a in t9notes:
        track9.append(Message('note_on', channel=9, note=a[0], time=a[1]*100))
        track9.append(Message('note_on', channel=9, note=a[0], velocity=0))
    mid.save('output.mid')

In [13]:
generate()