## Imports

In [21]:
from midi_to_dataframe import NoteMapper, MidiReader, MidiWriter
import IPython
from IPython.display import Image, IFrame
from PIL import Image
import seaborn as sns
import pandas as pd
import numpy as np
import os
import json
import music21
import pickle
import nltk
nltk.download('punkt')

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.layers import Embedding
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils
from sklearn.model_selection import train_test_split
from keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

[nltk_data] Downloading package punkt to /home/maarij/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


# Load Mappings File

A **NoteMapper** object encapsulates how MIDI note information is converted to text to be displayed within a DataFrame. This object is initialized from a JSON file, containing three objects:

* **midi-to-text**: JSON mapping of MIDI program numbers to their textual representation. Used when converting MIDI files to DataFrames.
    * For example: *{"0": "piano"}*
* **text-to-midi**: JSON mapping of textual representations of MIDI instruments to MIDI program numbers. Used when writing DataFrames to MIDI.
    * For example: *{"piano": 0}*
* **durations**: JSON mapping of textual representations of MIDI instruments to predefined quantization values (in quarter notes). Used when converted MIDI files to DataFrames.
    * For example: *{"piano": [0.25, 0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0, 2.5, 3, 4, 6, 8, 12, 16]}*

In [22]:
note_mapping_config_path = "./config/map-to-group.json"
note_mapper = NoteMapper(note_mapping_config_path)

## Convert a MIDI file to a DataFrame

The **MidiReader** object is used to read a MIDI file from disk and convert it to a **DataFrame**. A **NoteMapper** object is passed to the MidiReader upon initialization to handle the MIDI to text conversion of note durations and program names.

In [23]:
reader = MidiReader(note_mapper)

# getting midi files
filepath = "./datasets/dataset_pop/"
MidiDataDF = pd.DataFrame()
count = 0

for filename in os.listdir(filepath):
    if filename.endswith(".midi"):
        
        # create file path
        count += 1
        #print(count, filename, end = " ")
        fullFilePath = filepath+filename

        # read file as dataframe
        tempDF = reader.convert_to_dataframe(fullFilePath)
        #print(tempDF.shape[0])
        MidiDataDF = MidiDataDF.append(tempDF)


In [24]:
#MidiDataDF

## MIDI DataFrame

The created DataFrame object contains the sequence of musical notes found in the input MIDI file, quantized by 16th notes and the following rows:

* **timestamp**: the MIDI timestamp (tick)
* **bpm**: the beats per minute at the timestamp
* **time_signature**: the time signature at the timestamp
* **measure**: the measure number at the timestamp
* **beat**: the downbeat within the current measure at the timestamp, in quarter notes
* **notes**: a textual representation of the notes played at the current timestamp

In [25]:
NotesDataDF = MidiDataDF[["notes"]]
#NotesDataDF

## Vocabulary Building

In [26]:
vocabulary = ["rest"] # add rest by default

### Instruments

In [27]:
instruments = ['bass', 'synthlead', 'synthfx', 'reed',
               'percussive', 'organ', 'guitar', 'pipe',
               'soundfx', 'chromatic', 'ethnic', 'piano',
               'brass', 'synthpad', 'ensemble', 'strings']

percussionInstruments = ['acousticbassdrum', 'bassdrum', 'rimshot', 'acousticsnare',
                         'clap', 'snare', 'lowfloortom', 'closedhat', 'highfloortom',
                         'pedalhat', 'lowtom', 'openhat', 'lowmidtom', 'highmidtom',
                         'crashcymbal', 'hightom', 'ridecymbal', 'chinesecymbal',
                         'ridebell', 'tambourine', 'splashcymbal', 'cowbell', 'vibraslap',
                         'highbongo', 'lowbongo', 'mutehighconga', 'openhighconga', 'lowconga',
                         'hightimbale', 'lowtimbale', 'highagogo', 'lowagogo', 'cabasa',
                         'maracas', 'shortwhistle', 'longwhistle', 'shortguiro', 'longguiro',
                         'claves', 'highwoodblock', 'lowwoodblock', 'mutecuica', 'opencuica',
                         'mutetriangle', 'opentriangle']

### Chords

In [28]:
notesTemp = list(NotesDataDF["notes"])
chordsList = []

for i in notesTemp:
    if i != "rest":
        indexSplit = i.split(",")
        for j in indexSplit:
            chord = j.split("_")
            if chord[0] != "percussion":
                chordsList.append(chord[1])
        
chordsList = set(chordsList)
print(chordsList)

{'f5', 'e8', 'f#2', 'f9', 'f3', 'a#4', 'g2', 'f#3', 'd8', 'e4', 'g#5', 'c4', 'c3', 'f8', 'g#6', 'd3', 'e5', 'b1', 'f#4', 'c5', 'f1', 'c9', 'f7', 'c#5', 'b6', 'd7', 'c0', 'c#8', 'a4', 'e7', 'a#2', 'a2', 'b5', 'b4', 'd#0', 'c2', 'g#8', 'a3', 'f4', 'g9', 'c7', 'g#1', 'g#4', 'f#6', 'a8', 'd#1', 'a#3', 'c#6', 'e1', 'f#7', 'f#1', 'd6', 'd4', 'e3', 'f2', 'e2', 'd#5', 'd9', 'c#1', 'g5', 'g1', 'g0', 'b8', 'c#4', 'd#6', 'b3', 'a#1', 'a1', 'd#2', 'd#7', 'a5', 'g#7', 'g3', 'e6', 'f#8', 'g#2', 'f6', 'c1', 'b0', 'g8', 'd5', 'c8', 'd#3', 'g6', 'a6', 'd#4', 'a#6', 'd1', 'd2', 'b7', 'a#7', 'c#7', 'f#5', 'a7', 'd#8', 'g4', 'c#9', 'd#9', 'c#3', 'a#5', 'e9', 'g7', 'g#3', 'b2', 'c#2', 'c6'}


### Durations

In [29]:
f = open(note_mapping_config_path)
jsonData = json.load(f)
f.close()

print(jsonData["durations"])

{'piano': [0.25, 0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0, 2.5, 3.0, 4.0, 6.0, 8.0, 12.0, 16.0], 'chromatic': [0.25, 0.5, 0.75, 1.0, 1.5, 2.0, 3.0, 4.0, 8.0, 12.0, 16.0], 'organ': [0.25, 0.5, 0.75, 1.0, 1.5, 2.0, 3.0, 4.0, 8.0, 12.0, 16.0], 'guitar': [0.25, 0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0, 2.5, 3.0, 4.0, 6.0, 8.0, 12.0, 16.0, 24.0, 28.0, 32.0], 'bass': [0.25, 0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0, 2.5, 3.0, 4.0, 6.0, 8.0, 12.0, 16.0], 'strings': [0.25, 0.5, 0.75, 1.0, 1.5, 2.0, 3.0, 4.0, 8.0, 12.0, 16.0], 'ensemble': [0.25, 0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0, 2.5, 3.0, 4.0, 6.0, 8.0, 12.0, 16.0], 'brass': [0.25, 0.5, 0.75, 1.0, 1.5, 2.0, 3.0, 4.0, 8.0, 12.0, 16.0], 'reed': [0.25, 0.5, 0.75, 1.0, 1.5, 2.0, 3.0, 4.0, 8.0, 12.0, 16.0], 'pipe': [0.25, 0.5, 0.75, 1.0, 1.5, 2.0, 3.0, 4.0, 8.0, 12.0, 16.0], 'synthlead': [0.25, 0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0, 2.5, 3.0, 4.0, 6.0, 8.0, 12.0, 16.0], 'synthpad': [0.25, 0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0, 2.5, 3.0, 4.0, 6.0, 8.0, 12.0, 16

### Build Vocabulary List

In [30]:
for i in instruments:
    for c in chordsList:
        for d in jsonData["durations"][i]:
            word = str(i) + "_" + str(c) + "_" + str(d)
            vocabulary.append(word)
            
for p in percussionInstruments:
    word = "percussion_" + str(p) + "_0.25"
    vocabulary.append(word)
            
print(len(vocabulary))
#print(vocabulary)

20292


### Create Dictionary to Map Word onto Integers

In [31]:
vocabMappings = dict(zip(vocabulary, range(0, len(vocabulary))))
#print(vocabMappings)

### Mapping Data To Integers (Forward Mapping)

In [32]:
notesTemp = list(NotesDataDF["notes"])
mappedNotes = []

for i in notesTemp:
    indexSplit = i.split(",")
    for j in indexSplit:
        if len(indexSplit) > 1:
            mapping = int(vocabMappings[j]) * (-1)
        else:
            mapping = int(vocabMappings[j])
        mappedNotes.append(mapping)

print(len(mappedNotes))
#mappedNotes

874450


In [33]:
vocabularyChars = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11"] # 10 is minus (-) and 11 is comma (,)
mappedNotesChars = []

for note in mappedNotes:
    temp = str(note)
    tempArr = [*temp]
    if tempArr[0] == "-":
        tempArr[0] = "10"
    tempArr.append("11")
    mappedNotesChars.extend(tempArr)
    
# Convert the list to a string with each element separated by a space
mappedNotesString = " ".join(mappedNotesChars)

print(len(mappedNotesString))
print(mappedNotesString[0:100])

12152858
10 1 3 9 8 8 11 10 1 3 4 3 3 11 10 1 3 7 7 8 11 0 11 0 11 0 11 10 1 3 7 7 8 11 10 1 3 4 3 3 11 10 1 


In [34]:
words = nltk.word_tokenize(mappedNotesString)
print("The number of tokens is", len(words)) 

unique_tokens = set(words)
print("The number of unique tokens are", len(unique_tokens)) 
#prints the number of unique tokens

The number of tokens is 5278049
The number of unique tokens are 12


In [35]:
vocab_size = 12  #chosen based on statistics of the model
oov_tok = '<OOV>'
embedding_dim = 100
padding_type='post'
trunc_type='post'

# tokenizes sentences
tokenizer = Tokenizer(num_words = vocab_size, oov_token=oov_tok)
tokenizer.fit_on_texts([mappedNotesString])
word_index = tokenizer.word_index


tokens = tokenizer.texts_to_sequences([mappedNotesString])[0]

# Pickle the tokenizer object and save it to a file
with open("./tokenizer.pkl", 'wb') as f:
    pickle.dump(tokenizer, f)

In [19]:
dataX = []
dataY = []
seq_length = 50

for i in range(0, len(tokens) - seq_length-1 , 1):
  seq_in = tokens[i:i + seq_length]
  seq_out = tokens[i + seq_length]

  if seq_out==1: #Skip samples where target word is OOV
    continue
    
  dataX.append(seq_in)
  dataY.append(seq_out)
 
N = len(dataX)
print ("Total training data size is -", N)

X = np.array(dataX)

# one hot encodes the output variable
y = np.array(dataY)
y = np_utils.to_categorical(dataY)

Total training data size is - 4900454


In [None]:
# Define checkpoint path and filename
checkpoint_path = "./model_checkpoint.h5"

# Create a ModelCheckpoint callback that saves the model weights only when validation accuracy improves
checkpoint = ModelCheckpoint(checkpoint_path, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')


In [58]:
model = keras.Sequential([
    keras.layers.Embedding(vocab_size, embedding_dim, input_length=seq_length),
    keras.layers.Bidirectional(keras.layers.LSTM(64, return_sequences=True)),
    keras.layers.Bidirectional(keras.layers.LSTM(32)),
    keras.layers.Dense(vocab_size, activation='softmax')
])

# Compile the model
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

# Print the model summary
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 50, 100)           1200      
                                                                 
 bidirectional (Bidirectiona  (None, 128)              84480     
 l)                                                              
                                                                 
 dense_1 (Dense)             (None, 12)                1548      
                                                                 
Total params: 87,228
Trainable params: 87,228
Non-trainable params: 0
_________________________________________________________________


In [None]:
# Train the model with checkpoint
num_epochs = 10
history = model.fit(X, y, epochs=num_epochs, batch_size=128, verbose=1, validation_split=0.2, callbacks=[checkpoint])