In [1]:
import sys
import re
import numpy as np
import pandas as pd
import music21
# to search for filenames with wildcard characters
from glob import glob
import IPython
# tqdm is used to predict the remaining time
from tqdm import tqdm
import pickle
from tensorflow.python.keras import utils
import play

In [2]:
from music21 import converter, instrument, note, chord, stream

In [3]:
songs = glob('datasets/*.mid')

In [4]:
songs = songs[:3]

In [5]:
def get_notes():
    notes = []
    for file in songs:
        # convert .mid file to a stream object
        midi = converter.parse(file)
        notes_to_parse = []
        try:
            # partition a stream into parts for each unique instrument
            parts = instrument.partitionByInstrument(midi)
            print(typeof(parts))
        except:
            pass
        # if there're instrument parts
        if parts:
            # To use recursion for a stream to check if there're inner substreams available
            notes_to_parse = parts.parts[0].recurse()
        else:
            # A very important read-only property that returns a new Stream that has all 
            # sub-containers “flattened” within it, that is, it returns a new Stream where 
            # no elements nest within other elements.
            notes_to_parse = midi.flat.notes
        for element in notes_to_parse:
            # extract pitch if the element is a note
            if isinstance(element, note.Note):
                notes.append(str(element.pitch))
            # append the normal form of chord(integers) to the notes list
            elif isinstance(element, chord.Chord):
                notes.append('.'.join(str(n) for n in element.normalOrder))
    with open('notes','wb') as filepath:
        pickle.dump(notes, filepath)
    return notes

In [6]:
# The notes which we'll be getting from the previous function are strings. A NN accepts inputs
# which are real values, hence we need to map these strings into real values
def prep_seq(notes, n_vocab):
    seq_length = 100
    # Remove duplicates from the notes list
    pitchnames = sorted(set(i for i in notes))
    # A dict to map these values and intgers
    notes_to_int = dict((note,n) for n, note in enumerate(pitchnames))
    net_in = []
    net_out = []
    # iterate over the whole notes list by selecting 100 notes every time, and the 101st will be 
    # the sequence output
    for i in range(0, len(notes)-seq_length,1):
        seq_in = notes[i:i+seq_length]
        seq_out = notes[i+seq_length]
        net_in.append([notes_to_int[j] for j in seq_in])
        net_out.append(notes_to_int[seq_out])
    number_of_patterns = len(net_in)
    
    # reshaping the input into LSTM compatible - samples, timesteps, features
    # Samples. One sequence is one sample. A batch is comprised of one or more samples.
    # Time Steps. One time step is one point of observation in the sample.
    # Features. One feature is one observation at a time step.
    net_in = np.reshape(net_in, (number_of_patterns, seq_length, 1))
    
    # Input normalization
    net_in = net_in/float(n_vocab)
    
    net_out = utils.to_categorical(net_out)
    return (net_in, net_out)

In [7]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, LSTM, Dense, Dropout, Flatten
def net_arch(net_in, n_vocab):
    model = Sequential()
    # 128 - dimensionality of the output space.
    # let’s say we have an input with shape (num_seq, seq_len, num_feature). If we 
    # don’t set return_sequences=True, our output will have the shape (num_seq, num_feature), 
    # but if we do, we will obtain the output with shape (num_seq, seq_len, num_feature).
    model.add(LSTM(128, input_shape=net_in.shape[1:], return_sequences = True))
    model.add(Dropout(0.2))
    model.add(LSTM(128, return_sequences=True))
    model.add(Flatten())
    model.add(Dense(256))
    model.add(Dropout(0.3))
    model.add(Dense(n_vocab))
    model.add(Activation('softmax'))
    
    model.compile(loss = 'categorical_crossentropy', optimizer='adam')
    return model

In [8]:
from tensorflow.keras.callbacks import ModelCheckpoint
def train(model, net_in, net_out, epochs):
    filepath = "weights.best.music3.hdf5"
    checkpoint = ModelCheckpoint(filepath, monitor = 'loss', verbose=0, save_best_only = True)
    
    model.fit(net_in, net_out, epochs = epochs, batch_size = 32, callbacks=[checkpoint])

In [11]:
def train_net():
    epochs = 100
    notes = get_notes()
    n_vocab = len(set(notes))
    net_in, net_out = prep_seq(notes, n_vocab)
    model = net_arch(net_in, n_vocab)
    train(model, net_in, net_out, epochs)

In [12]:
train_net()

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [37]:
def get_inputseq(notes, pitchnames, n_vocab):
    note_to_int = dict((note, number) for number, note in enumerate(pitchnames))

    sequence_length = 100
    network_input = []
    for i in range(0, len(notes) - sequence_length, 1):
        sequence_in = notes[i:i + sequence_length]
        network_input.append([note_to_int[char] for char in sequence_in])

    return (network_input)

In [50]:
def generate_notes(model, net_in, pitchnames, n_vocab):
    start = np.random.randint(0, len(net_in)-1)
    int_to_note = dict((number, note) for number, note in enumerate(pitchnames))
    pattern = net_in[start]
    prediction_output = []
    print('Generating notes')
    # Generating 500 notes
    for note_index in range(500):
        prediction_input = np.reshape(pattern,(1,len(pattern),1))
        prediction_input = prediction_input/float(n_vocab)
        prediction = model.predict(prediction_input, verbose=0)
        index = np.argmax(prediction)
        result = int_to_note[index]
        prediction_output.append(result)
        # add the generated index of the character and proceed by not considering the first char
        # in each iteration
        pattern.append(index)
        pattern = pattern[1:len(pattern)]
    print('Notes generated')
    return prediction_output

In [51]:
def create_midi(prediction_output):
    offset = 0
    output_notes = []
    # create notes and chord objects
    for pattern in prediction_output:
        # Chord
        if('.' in pattern) or pattern.isdigit():
            notes_in_chord = pattern.split('.')
            notes = []
            for current_note in notes_in_chord:
                new_note = note.Note(int(current_note))
                new_note.storedInstrument = instrument.Piano()
                notes.append(new_note)
            new_chord = chord.Chord(notes)
            new_chord.offset = offset
            output_notes.append(new_chord)
        # Note
        else:
            new_note = note.Note(pattern)
            new_note.offset = offset
            new_note.storedInstrument = instrument.Piano()
            output_notes.append(new_note)
        offset += 0.5 # increase offset so that notes do not get clumsy
    
    midi_stream = stream.Stream(output_notes)
    print('MIDI file save')
    midi_stream.write('midi', fp='test_output.mid')

In [64]:
def generate():
    # loading notes
    with open('notes','rb') as filepath:
        notes = pickle.load(filepath)
    pitchnames = sorted(set(i for i in notes))
    n_vocab = len(pitchnames)
    print('Start music generation.')
    net_in = get_inputseq(notes, pitchnames, n_vocab)
    # print(type(net_in[0]))
    normalized_in = np.reshape(net_in, (len(net_in), 100, 1))
    # Input normalization
    normalized_in = normalized_in/float(n_vocab)
    model = net_arch(normalized_in, n_vocab)
    
    model.load_weights('weights.best.music3.hdf5')
    prediction_output = generate_notes(model, net_in, pitchnames, n_vocab)
    create_midi(prediction_output)

In [65]:
generate()

Start music generation.
Generating notes
Notes generated
MIDI file save


In [67]:
play.play_midi('test_output.mid')

Music file test_output.mid loaded!
