This file takes the 92 .MIDI files and converts them to a multi-hot encoded array for input into the NN models
The library, music21 is primarily used, Documentation http://web.mit.edu/music21/doc/index.html
*NOTE* music21 could not process some midi files. The midi files were batch converted to .mxl format using MuseScore 3

In [5]:
#Import libraries
from music21 import stream,note,chord,converter,instrument
import numpy as np
import pandas as pd
import glob
import os
from sklearn.preprocessing import OneHotEncoder
import matplotlib.pyplot as plt

In [6]:
#Set pandas display options
pd.set_option("display.max_rows", 100)
pd.set_option("display.max_columns", 10)

In [7]:
# Get the first two piano parts of each song
def getPiano(loaded_song):
    # Gets elements of loaded song that are "Parts"
    parts = loaded_song.getElementsByClass("Part")
    strm_prts = []
    i=0
    
    # Store only the first 2 parts of each song
    for p in parts:
        if isinstance(p.getInstrument(), instrument.Piano):
            if(i<2):
                strm_prts.append(p)
                i+=1
    # Append parts into a music "Stream", and flatten
    strm = stream.Score(strm_prts)
    piano_parts = strm.flatten()
    
    # Store only notes and chords (ignore time signatures, measures, etc..)
    piano_parts = piano_parts.getElementsByClass(["Note","Chord"])
    return piano_parts

In [8]:
#Store notes/chords in numpy array with 1/16 beat uniform time step
def splitToTimesteps(piano_parts):
    #Initialize array: 
    #First col is timesteps
    #Second col is whether note is held or not (0 or 1)
    #Third col is midi pitch of a note
    step_array = np.empty((0,3))
    
    step_size = 0.0625
    next_offset=0
    for i in piano_parts:
        # Check for notes that are not a factor of the timestep
        # Round duration down to nearest timestep
        if(i.offset%step_size>0):
            i.offset = next_offset
        if(i.quarterLength%step_size>0):
            next_offset = i.offset+np.floor(i.quarterLength*16)/16
        
        #Split offsets into step_size increments
        #If chord, split into individual notes of chord, and store at the same timesteps
        if(i.isNote):
            for j in np.arange(i.offset,i.offset+np.floor(i.quarterLength*16)/16,step_size):
                if(j==i.offset):
                    n_hold = 1 #Indicates a note on event
                else:
                    n_hold = 0 #Indicates a note is held
                step_array = np.append(step_array,[[j,n_hold, i.pitch.midi]], axis = 0 )
        elif (i.isChord):
            for p in i.pitches:
                for j in np.arange(i.offset,i.offset+np.floor(i.quarterLength*16)/16,step_size):
                    if(j==i.offset):
                        n_hold = 1 #Indicates a note on event
                    else:
                        n_hold = 0 #Indicates a note is held
                    step_array = np.append(step_array,[[j,n_hold, p.midi]], axis = 0 )
    return(step_array)

In [9]:
#Multi hot encode notes 
def multiHotEncode(step_array):
    # Using sklearn preprocessing library
    enc = OneHotEncoder(handle_unknown='ignore')
    enc.fit(np.arange(0,128,1).reshape(-1,1))
    
    # Take only the pitch column and one hot encode notes
    midi_pitches = step_array[:,2].astype(int).reshape(-1,1)
    encoded_pitches = enc.transform(midi_pitches).toarray()
    
    #Append encoded pitches to timesteps to create new encoded array
    encoded_array = np.append(step_array[:,0:2], encoded_pitches, axis=1)

    #https://numpy.org/doc/stable/reference/generated/numpy.nditer.html
    #Replace 1s with -1 to indicate note is held
    for row in encoded_array:
        if row[1] == 0:
            for cell in np.nditer(row[2:], flags = ['refs_ok'], op_flags=['readwrite']):
                if cell > 0:
                    cell[...] = -1.0

    #Remove note on or hold column, not needed anymore
    encoded_array = np.delete(encoded_array,1,axis=1)
    
    #Store encoded_array as a pandas dataframe
    encoded_df = pd.DataFrame(encoded_array, columns = np.arange(-1,128,1))
    encoded_df = encoded_df.rename(columns = {-1 : 'step'})
    
    #Sum rows with duplicate timestep to get notes played multi-hot representation
    #https://stackoverflow.com/questions/68120846/how-to-combine-rows-that-have-the-same-values-in-two-columns-python
    #https://stackoverflow.com/questions/27968028/add-row-with-duplicate-index-in-a-panda-dataframe
    multihot_df = encoded_df.groupby("step").sum().reset_index()
    multihot_np = multihot_df.to_numpy()
    return(multihot_np)


In [10]:
def inferRests(no_rests):
    #Infer rests from lack of notes occurring to fill in missing timestamps
    #Add zero row to indicate rest

    #Create empty array for data
    data = np.empty((0,129))

    #Iterate through each row, check if difference between next row >timestep
    #Then add number of timesteps of the difference
    for row in range(no_rests.shape[0]):
        data = np.append(data,no_rests[row].reshape(-1,129),axis=0)
        if(row+1<no_rests.shape[0]):
            if(no_rests[row+1,0]-no_rests[row,0]>0.0625):
                n = int((no_rests[row+1,0]-no_rests[row,0])/0.0625)-1
                z = np.zeros((n,129))
                for i in range(n):
                    z[i,0] = no_rests[row,0]+(i+1)*0.0625
                data = np.append(data,z,axis=0)
    return(data)

In [21]:
#File Navigation
#Put the directory containing the .mxl files here
directory = 'C:/Users/Amir/Documents/Graduate School/SEP788_789/Project/music_dataset-main/to_MusicXML/*.mxl'

#Initialize some loop variables
all_songs = np.empty((0,256,129))
song_index = np.empty(0)

#Loop through the folder and process each .mxl file, converting to a multi-hot encoded array
#Additionally, for each file, shift the key up by a Major 3rd three times to expand dataset
#NOTE* This may take a while depending how many files there are
for file in glob.glob(directory):
    #Load Song
    loaded_song = converter.parse(file) #Parse .mxl file and load into python
    for j in range(3):
        piano_only = getPiano(loaded_song) #Get piano parts only
        step_arr = splitToTimesteps(piano_only) #Store in numpy array with 1/16 uniform time step
        multiHot_arr = multiHotEncode(step_arr) #Turn pitches into multi-hot encoding
        full_song = inferRests(multiHot_arr) #Infer rests from missing timesteps and add them to create uniform array
        #Store song in 16 second increments for training (16*0.0625 = 256)
        for i in range(0,full_song.shape[0],256):
            if(i+256<full_song.shape[0]):
                all_songs = np.append(all_songs, full_song[i:i+256,:].reshape(1,256,129),axis=0)
                song_index = np.append(song_index,os.path.splitext(os.path.basename(file))[0])
        loaded_song = loaded_song.transpose('M3')

In [22]:
#Reshape songs to 2D to be stored in a .txt file
all_songs_reshaped = all_songs.reshape(-1,129).astype('float64')

In [46]:
#Fixing issues with duplicate notes at the same timestep
no_time = np.delete(all_songs_reshaped,0,axis=1)
wr = np.where(abs(no_time)>1) #Check where notes are not 1 or -1
z = np.append(wr[0].reshape(-1,1),wr[1].reshape(-1,1),axis=1)

#Loop through notes that are duplicate and replace them
for i in range(z.shape[0]):
    # Check for note hold but no note-on events, and add a note-hold event
    if no_time[z[i,0],z[i,1]] < -1 and no_time[z[i,0]-1,z[i,1]] ==0:
        no_time[z[i,0]-1,z[i,1]] = -1
    # Check for note-on or note-hold events > 1
    if abs(no_time[z[i,0],z[i,1]]) > 1:
        no_time[z[i,0],z[i,1]] /= abs(no_time[z[i,0],z[i,1]])

In [40]:
#Save to songs to a .txt file
np.savetxt('C:/Users/Amir/Documents/Graduate School/SEP788_789/Project/music_dataset-main/all_songs_m3.txt',no_time,fmt='%d',delimiter=',')