In [1]:
import sys
import re
import numpy as np
import pandas as pd
import music21
# to search for filenames with wildcard characters
from glob import glob
import IPython
# tqdm is used to predict the remaining time
from tqdm import tqdm
import pickle
from tensorflow.python.keras import utils
import play

In [2]:
from music21 import converter, instrument, note, chord, stream

In [3]:
songs = glob('datasets/*.mid')

In [4]:
songs = songs[:3]

In [5]:
def get_notes():
    notes = []
    for file in songs:
        # convert .mid file to a stream object
        midi = converter.parse(file)
        notes_to_parse = []
        try:
            # partition a stream into parts for each unique instrument
            parts = instrument.partitionByInstrument(midi)
            print(typeof(parts))
        except:
            pass
        # if there're instrument parts
        if parts:
            # To use recursion for a stream to check if there're inner substreams available
            notes_to_parse = parts.parts[0].recurse()
        else:
            # A very important read-only property that returns a new Stream that has all 
            # sub-containers “flattened” within it, that is, it returns a new Stream where 
            # no elements nest within other elements.
            notes_to_parse = midi.flat.notes
        for element in notes_to_parse:
            # extract pitch if the element is a note
            if isinstance(element, note.Note):
                notes.append(str(element.pitch))
            # append the normal form of chord(integers) to the notes list
            elif isinstance(element, chord.Chord):
                notes.append('.'.join(str(n) for n in element.normalOrder))
    with open('notes','wb') as filepath:
        pickle.dump(notes, filepath)
    return notes

In [6]:
# The notes which we'll be getting from the previous function are strings. A NN accepts inputs
# which are real values, hence we need to map these strings into real values
def prep_seq(notes, n_vocab):
    seq_length = 100
    # Remove duplicates from the notes list
    pitchnames = sorted(set(i for i in notes))
    # A dict to map these values and intgers
    notes_to_int = dict((note,n) for n, note in enumerate(pitchnames))
    net_in = []
    net_out = []
    # iterate over the whole notes list by selecting 100 notes every time, and the 101st will be 
    # the sequence output
    for i in range(0, len(notes)-seq_length,1):
        seq_in = notes[i:i+seq_length]
        seq_out = notes[i+seq_length]
        net_in.append([notes_to_int[j] for j in seq_in])
        net_out.append(notes_to_int[seq_out])
    number_of_patterns = len(net_in)
    
    # reshaping the input into LSTM compatible - samples, timesteps, features
    # Samples. One sequence is one sample. A batch is comprised of one or more samples.
    # Time Steps. One time step is one point of observation in the sample.
    # Features. One feature is one observation at a time step.
    net_in = np.reshape(net_in, (number_of_patterns, seq_length, 1))
    
    # Input normalization
    net_in = net_in/float(n_vocab)
    
    net_out = utils.to_categorical(net_out)
    return (net_in, net_out)

In [7]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, LSTM, Dense, Dropout, Flatten
def net_arch(net_in, n_vocab):
    model = Sequential()
    # 128 - dimensionality of the output space.
    # let’s say we have an input with shape (num_seq, seq_len, num_feature). If we 
    # don’t set return_sequences=True, our output will have the shape (num_seq, num_feature), 
    # but if we do, we will obtain the output with shape (num_seq, seq_len, num_feature).
    model.add(LSTM(128, input_shape=net_in.shape[1:], return_sequences = True))
    model.add(Dropout(0.2))
    model.add(LSTM(128, return_sequences=True))
    model.add(Flatten())
    model.add(Dense(256))
    model.add(Dropout(0.3))
    model.add(Dense(n_vocab))
    model.add(Activation('softmax'))
    
    model.compile(loss = 'categorical_crossentropy', optimizer='adam')
    return model

In [8]:
from tensorflow.keras.callbacks import ModelCheckpoint
def train(model, net_in, net_out, epochs):
    filepath = "weights.best.music3.hdf5"
    checkpoint = ModelCheckpoint(filepath, monitor = 'loss', verbose=0, save_best_only = True)
    
    model.fit(net_in, net_out, epochs = epochs, batch_size = 32, callbacks=[checkpoint])

In [9]:
def train_net():
    epochs = 200
    notes = get_notes()
    n_vocab = len(set(notes))
    net_in, net_out = prep_seq(notes, n_vocab)
    model = net_arch(net_in, n_vocab)
    train(model, net_in, net_out, epochs)

In [None]:
train_net()