In [1]:
#importing packages
import pretty_midi
import numpy as np
import mir_eval.display
import librosa.display
import matplotlib.pyplot as plt
%matplotlib inline
import IPython.display
import os
from mido import MidiFile
import sklearn

In [2]:
# Contains information for each individual note
class Note:
    def __init__(self, velocity, beginTick, endTick):
        self.velocity = velocity
        self.beginTick = beginTick
        self.endTick = endTick
    
    def addEndTick(self, endTick):
        self.endTick = endTick

In [3]:
# Contians information for each file
class File:
    def __init__(self, ticksPerSixteenthNote, style, tempo, isBeat, timeSignature, instruments):
        self.ticksPerSixteenthNote = ticksPerSixteenthNote
        self.style = style
        self.tempo = tempo
        self.isBeat = isBeat
        self.timeSignature = timeSignature
        self.instruments = instruments

In [4]:
# Contains information for each drummer (TODO: add functions)
class Drummer:
    def __init__(self, name, files):
        self.name = name
        self.files = files

In [5]:
def extractMidiData(filename):
    # Extract information from filename format
    filenameInfo = filename.split("_")
    style = filenameInfo[1]
    tempo = filenameInfo[2]
    # Files are classified as either beats or fills
    isBeat = (filenameInfo[3] == 'beat')
    # The split function keeps the file extension, so we have to get rid of it for the time signature
    timeSignature = filenameInfo[4].replace('.mid', '')
    
    # Extract information from Mido
    mid = MidiFile(filename)   
    ticksPerBeat = mid.ticks_per_beat
    # We will be checking to see how far off every note is from 16th note quantization
    ticksPerSixteenthNote = ticksPerBeat / 4
    
    # There should only be one track per file
    for track in mid.tracks:
        # Keeps track of what time we're at in the MIDI file using the "time" attribute
        # in "note_on" and "note_off" messages
        currentTick = 0
        
        # Dictionary for all of our MIDI instruments (snare, hi-hat, etc.)
        instruments = {}
        
        for msg in track:
            if msg.type == "note_on":
                if msg.note not in instruments:
                    # Create new list of notes for new instrument
                    instruments[msg.note] = []
                    
                currentTick += msg.time
                # End tick will be added when a note_off message appears
                instruments[msg.note].append(Note(msg.velocity, currentTick, 0))
            elif msg.type == "note_off":
                currentTick += msg.time
                # Add end tick value to the last note in the list
                lastNoteIndex = len(instruments[msg.note]) - 1
                instruments[msg.note][lastNoteIndex].addEndTick(currentTick)
    
    
                
    return File(ticksPerSixteenthNote, style, tempo, isBeat, timeSignature, instruments)

drummers = os.listdir('./groove')
drummersList = []

numberOfFiles = 0

for drummer in drummers:
    if drummer.find('drummer') != -1:
        filesList = []
        sessions = os.listdir('./groove/' + drummer)
        for session in sessions:
            if session != '.DS_Store' and session != 'Icon\r' and session != "eval_session":
                files = os.listdir('./groove/' + drummer + '/' + session)
                for file in files:
                    if file.endswith('.mid'):
                        numberOfFiles += 1
                        filePath = './groove/' + drummer + "/" + session + "/"  + file
                        filesList.append(extractMidiData(filePath))

        drummersList.append(Drummer(drummer, filesList))


# Test 1: Velocity and Microtiming Histograms With Snare, Bass Drum, and Hi-Hat Instruments

We'll start off with the classification part of this project by getting histogram bin percentages for velocity and microtiming for three different instruments: bass drum, snare, and hi hat

In [6]:
def getPercentOffBeat(beginTick, ticksPerSixteenthNote):
    # Using percentage instead of ticks off beat to normalize in case files have differing ticks per beat
    beginModSixteenth = int(beginTick % ticksPerSixteenthNote)
    ticksOffBeat = min(beginModSixteenth, int(abs(beginModSixteenth - ticksPerSixteenthNote)))
    percentageOffBeat = ticksOffBeat / (ticksPerSixteenthNote / 2)
    return percentageOffBeat

In [7]:
def extractFeatures(file):
    bassDrumVelocity = []
    bassDrumMicrotiming = []
    
    snareVelocity = []
    snareMicrotiming = []
    
    hihatVelocity = []
    hihatMicrotiming = []
    
    ticksPerSixteenthNote = file.ticksPerSixteenthNote
    
    # Put velocities and microtiminging percentages into seperate arrays
    for instrumentNum in file.instruments:
        if instrumentNum == 35 or instrumentNum == 36:
            for note in file.instruments[instrumentNum]:
                bassDrumVelocity.append(note.velocity)
                bassDrumMicrotiming.append(getPercentOffBeat(note.beginTick, ticksPerSixteenthNote))
        elif instrumentNum == 38 or instrumentNum == 40:
            for note in file.instruments[instrumentNum]:
                snareVelocity.append(note.velocity)
                snareMicrotiming.append(getPercentOffBeat(note.beginTick, ticksPerSixteenthNote))
        elif instrumentNum == 42 or instrumentNum == 44 or instrumentNum == 46 or instrumentNum == 49 or instrumentNum == 57:
            for note in file.instruments[instrumentNum]:
                hihatVelocity.append(note.velocity)
                hihatMicrotiming.append(getPercentOffBeat(note.beginTick, ticksPerSixteenthNote))
    
    # 15 velocity bins
    velocityBins = np.arange(0, 127, 8)
    # 9 histogram bins
    microtimingBins = np.arange(0, 1, 0.1)
    
    bassDrumVelocityHist, bins = np.histogram(bassDrumVelocity, bins=velocityBins)
    bassDrumMicrotimingHist, bins = np.histogram(bassDrumMicrotiming, bins=microtimingBins)
    
    snareVelocityHist, bins = np.histogram(snareVelocity, bins=velocityBins)
    snareMicrotimingHist, bins = np.histogram(snareMicrotiming, bins=microtimingBins)

    hihatVelocityHist, bins = np.histogram(hihatVelocity, bins=velocityBins)
    hihatMicrotimingHist, bins = np.histogram(hihatMicrotiming, bins=microtimingBins)
    
    if(sum(bassDrumVelocityHist) == 0 or sum(bassDrumMicrotimingHist) == 0):
        print("bass")
        return 0
    elif(sum(snareVelocityHist) == 0 or sum(snareMicrotimingHist) == 0):
        print("snare")
        return 0
    elif(sum(hihatVelocityHist) == 0 or sum(hihatMicrotimingHist) == 0):
        print("hihat")
        return 0
    else:
        bassDrumVelocityHist = bassDrumVelocityHist / sum(bassDrumVelocityHist)
        bassDrumMicrotimingHist = bassDrumMicrotimingHist / sum(bassDrumMicrotimingHist)
        snareVelocityHist = snareVelocityHist / sum(snareVelocityHist)
        snareMicrotimingHist = snareMicrotimingHist / sum(snareMicrotimingHist)
        hihatVelocityHist = hihatVelocityHist / sum(hihatVelocityHist)
        hihatMicrotimingHist = hihatMicrotimingHist / sum(hihatMicrotimingHist)
    
    output = np.concatenate((bassDrumVelocityHist, bassDrumMicrotimingHist, 
                            snareVelocityHist, snareMicrotimingHist,
                            hihatVelocityHist, hihatMicrotimingHist))
    
    return output

In [8]:
features = np.zeros((numberOfFiles,72)) 
labels = np.zeros((numberOfFiles))
validFiles = 0
# FOR EACH FILE NOT EACH DRUMMER!!!!
for i, drummer in enumerate(drummersList):
    print(drummer.name)
    for file in drummer.files:
        output = extractFeatures(file)
        if isinstance(output, int):
            i = 0
        else:
            features[i, :] = output
            labels[i] = i
            validFiles += 1
            
print(validFiles)

drummer8
drummer6
hihat
drummer1
bass
hihat
bass
snare
bass
hihat
hihat
hihat
snare
bass
hihat
hihat
bass
bass
snare
hihat
bass
bass
snare
bass
snare
bass
hihat
bass
bass
bass
hihat
hihat
bass
bass
bass
hihat
snare
hihat
bass
hihat
bass
snare
hihat
bass
hihat
snare
hihat
bass
snare
hihat
hihat
snare
hihat
hihat
hihat
hihat
bass
hihat
bass
bass
hihat
snare
hihat
snare
bass
hihat
hihat
bass
bass
hihat
hihat
bass
hihat
hihat
hihat
bass
bass
bass
bass
hihat
hihat
hihat
hihat
hihat
bass
bass
hihat
snare
bass
hihat
hihat
hihat
hihat
hihat
bass
bass
snare
hihat
bass
hihat
hihat
hihat
snare
hihat
snare
hihat
hihat
hihat
hihat
hihat
bass
bass
hihat
hihat
bass
hihat
bass
bass
hihat
hihat
hihat
bass
hihat
hihat
hihat
bass
hihat
bass
bass
hihat
hihat
snare
hihat
hihat
bass
snare
hihat
hihat
bass
hihat
bass
hihat
snare
hihat
hihat
hihat
bass
hihat
bass
bass
hihat
hihat
hihat
bass
bass
snare
hihat
bass
hihat
bass
hihat
bass
hihat
bass
snare
snare
hihat
hihat
hihat
hihat
hihat
snare
bass
hihat
bass
h

In [9]:
from sklearn.pipeline import Pipeline
from sklearn.neural_network import MLPClassifier
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

pipe = Pipeline([
        ('scaler', StandardScaler()),
        ('dim_red', PCA(n_components = 10)),
        ('classifier', MLPClassifier(hidden_layer_sizes=(10,5), max_iter=2000, activation='relu'))
        ])

In [10]:
scores = sklearn.model_selection.cross_validate(pipe, features, labels, cv=5,scoring=('f1_macro', 'accuracy'),return_train_score=True)

print(scores,'\n')
print('Accuracy mean and variance', np.mean(scores['test_accuracy']),np.var(scores['test_accuracy']),'\n')
print('F1 macro mean and variance', np.mean(scores['test_f1_macro']),np.var(scores['test_f1_macro']),'\n')



{'fit_time': array([0.64873886, 0.65916181, 0.76650333, 0.59727693, 3.30703211]), 'score_time': array([0.00215816, 0.00190806, 0.00204897, 0.00178409, 0.00187397]), 'test_f1_macro': array([0.33182504, 0.33333333, 0.49887133, 0.24943311, 0.25      ]), 'train_f1_macro': array([0.80952381, 0.30944287, 0.80952381, 0.27749513, 1.        ]), 'test_accuracy': array([0.99099099, 0.9954955 , 0.9954955 , 0.99099099, 0.99099099]), 'train_accuracy': array([0.99887387, 0.9954955 , 0.99887387, 0.9954955 , 1.        ])} 

Accuracy mean and variance 0.9927927927927929 4.86973459946447e-06 

F1 macro mean and variance 0.332692561889026 0.008277348691125055 



# Test 2: Velocity and Microtiming Histograms With Only Snare And Bass Drum

Many files were lost due to hi-hats not appearing in files, so I got rid of hi-hats and only kept snare and bass drum histogramas to see what happens

In [11]:
def extractFeatures(file):
    bassDrumVelocity = []
    bassDrumMicrotiming = []
    
    snareVelocity = []
    snareMicrotiming = []
    
    ticksPerSixteenthNote = file.ticksPerSixteenthNote
    
    # Put velocities and microtiminging percentages into seperate arrays
    for instrumentNum in file.instruments:
        if instrumentNum == 35 or instrumentNum == 36:
            for note in file.instruments[instrumentNum]:
                bassDrumVelocity.append(note.velocity)
                bassDrumMicrotiming.append(getPercentOffBeat(note.beginTick, ticksPerSixteenthNote))
        elif instrumentNum == 38 or instrumentNum == 40:
            for note in file.instruments[instrumentNum]:
                snareVelocity.append(note.velocity)
                snareMicrotiming.append(getPercentOffBeat(note.beginTick, ticksPerSixteenthNote))
    
    # 15 velocity bins
    velocityBins = np.arange(0, 127, 8)
    # 9 histogram bins
    microtimingBins = np.arange(0, 1, 0.1)
    
    bassDrumVelocityHist, bins = np.histogram(bassDrumVelocity, bins=velocityBins)
    bassDrumMicrotimingHist, bins = np.histogram(bassDrumMicrotiming, bins=microtimingBins)
    
    snareVelocityHist, bins = np.histogram(snareVelocity, bins=velocityBins)
    snareMicrotimingHist, bins = np.histogram(snareMicrotiming, bins=microtimingBins)
    
    if(sum(bassDrumVelocityHist) == 0 or sum(bassDrumMicrotimingHist) == 0):
        print("bass")
        return 0
    elif(sum(snareVelocityHist) == 0 or sum(snareMicrotimingHist) == 0):
        print("snare")
        return 0
    else:
        bassDrumVelocityHist = bassDrumVelocityHist / sum(bassDrumVelocityHist)
        bassDrumMicrotimingHist = bassDrumMicrotimingHist / sum(bassDrumMicrotimingHist)
        snareVelocityHist = snareVelocityHist / sum(snareVelocityHist)
        snareMicrotimingHist = snareMicrotimingHist / sum(snareMicrotimingHist)
    
    output = np.concatenate((bassDrumVelocityHist, bassDrumMicrotimingHist, 
                            snareVelocityHist, snareMicrotimingHist))
    
    return output

In [12]:
features = np.zeros((numberOfFiles,48)) 
labels = np.zeros((numberOfFiles))
validFiles = 0
# FOR EACH FILE NOT EACH DRUMMER!!!!
for i, drummer in enumerate(drummersList):
    print(drummer.name)
    for file in drummer.files:
        output = extractFeatures(file)
        if isinstance(output, int):
            i = 0
        else:
            features[i, :] = output
            labels[i] = i
            validFiles += 1
            
print(validFiles)

drummer8
drummer6
drummer1
bass
bass
snare
bass
snare
bass
bass
bass
snare
bass
bass
snare
bass
snare
bass
bass
bass
bass
bass
bass
bass
snare
bass
bass
snare
bass
snare
bass
snare
snare
bass
bass
bass
snare
snare
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
snare
bass
bass
bass
snare
bass
snare
snare
bass
bass
bass
bass
bass
bass
bass
bass
bass
snare
bass
snare
bass
bass
snare
bass
bass
bass
bass
bass
snare
bass
bass
bass
bass
snare
snare
snare
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
snare
bass
bass
bass
bass
bass
bass
bass
snare
bass
bass
bass
bass
bass
bass
bass
bass
bass
drummer7
snare
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
bass
snare
bass
bass
bass
bass
bass
bass
bass
snare
bass
bass
bass
bass
bass
bass
bass
bass
bass
snare


In [13]:
scores = sklearn.model_selection.cross_validate(pipe, features, labels, cv=5,scoring=('f1_macro', 'accuracy'),return_train_score=True)

print(scores,'\n')
print('Accuracy mean and variance', np.mean(scores['test_accuracy']),np.var(scores['test_accuracy']),'\n')
print('F1 macro mean and variance', np.mean(scores['test_f1_macro']),np.var(scores['test_f1_macro']),'\n')



{'fit_time': array([0.73528099, 0.78604269, 0.98572111, 0.61190033, 0.694103  ]), 'score_time': array([0.00188994, 0.00185823, 0.00186801, 0.00182176, 0.00231886]), 'test_f1_macro': array([0.33257748, 0.24943311, 0.33182504, 0.33182504, 0.24943311]), 'train_f1_macro': array([0.25913314, 0.8749291 , 0.37464629, 0.2499291 , 0.62478754]), 'test_accuracy': array([0.99099099, 0.99099099, 0.99099099, 0.99099099, 0.99099099]), 'train_accuracy': array([0.99324324, 0.99887387, 0.99436937, 0.99436937, 0.99662162])} 

Accuracy mean and variance 0.990990990990991 0.0 

F1 macro mean and variance 0.29901875280026535 0.001639233029325517 



# Test 3: Only Snare

Getting rid of hi-hats gave us more files and the same results, now we'll try getting rid of bass drums.

In [14]:
def extractFeatures(file):
    snareVelocity = []
    snareMicrotiming = []
    
    ticksPerSixteenthNote = file.ticksPerSixteenthNote
    
    # Put velocities and microtiminging percentages into seperate arrays
    for instrumentNum in file.instruments:
        if instrumentNum == 38 or instrumentNum == 40:
            for note in file.instruments[instrumentNum]:
                snareVelocity.append(note.velocity)
                snareMicrotiming.append(getPercentOffBeat(note.beginTick, ticksPerSixteenthNote))
    
    # 15 velocity bins
    velocityBins = np.arange(0, 127, 8)
    # 9 histogram bins
    microtimingBins = np.arange(0, 1, 0.1)
    
    snareVelocityHist, bins = np.histogram(snareVelocity, bins=velocityBins)
    snareMicrotimingHist, bins = np.histogram(snareMicrotiming, bins=microtimingBins)

    if(sum(snareVelocityHist) == 0 or sum(snareMicrotimingHist) == 0):
        return 0
    else:
        snareVelocityHist = snareVelocityHist / sum(snareVelocityHist)
        snareMicrotimingHist = snareMicrotimingHist / sum(snareMicrotimingHist)
    
    output = np.concatenate((snareVelocityHist, snareMicrotimingHist))
    
    return output

In [15]:
features = np.zeros((numberOfFiles,24)) 
labels = np.zeros((numberOfFiles))
validFiles = 0
# FOR EACH FILE NOT EACH DRUMMER!!!!
for i, drummer in enumerate(drummersList):
    print(drummer.name)
    for file in drummer.files:
        output = extractFeatures(file)
        if isinstance(output, int):
            i = 0
        else:
            features[i, :] = output
            labels[i] = i
            validFiles += 1
            
print(validFiles)

drummer8
drummer6
drummer1
drummer7
drummer10
drummer9
drummer2
drummer5
drummer4
drummer3
1027


In [16]:
scores = sklearn.model_selection.cross_validate(pipe, features, labels, cv=5,scoring=('f1_macro', 'accuracy'),return_train_score=True)

print(scores,'\n')
print('Accuracy mean and variance', np.mean(scores['test_accuracy']),np.var(scores['test_accuracy']),'\n')
print('F1 macro mean and variance', np.mean(scores['test_f1_macro']),np.var(scores['test_f1_macro']),'\n')



{'fit_time': array([3.37168789, 0.76535773, 0.56230497, 0.70172215, 0.65731096]), 'score_time': array([0.00237679, 0.00173521, 0.00172615, 0.00182295, 0.00170517]), 'test_f1_macro': array([0.24943311, 0.33182504, 0.24943311, 0.25      , 0.25      ]), 'train_f1_macro': array([1.        , 0.8749291 , 0.54159576, 0.66666667, 0.16064338]), 'test_accuracy': array([0.99099099, 0.99099099, 0.99099099, 0.99099099, 0.99099099]), 'train_accuracy': array([1.        , 0.99887387, 0.99662162, 0.99774775, 0.99324324])} 

Accuracy mean and variance 0.990990990990991 0.0 

F1 macro mean and variance 0.2661382501718636 0.0010787527878152105 



# Test 4: Regressor

Ok just the snare works pretty well! Let's see what happens when we try to predict values using a regressor. The features will be the drummer and the tempo, while the target values will be the velocity and microtimining bin percentages.

In [42]:
sr = 22050

#Instead of writing the code to extract the features and target value we define a function,
#which is more elegant, it's reusable (shorter code) and makes the following code more readable.
#For practicality, the vector features are flattenedm so that that can be stored on a row of a N-dim array

#Mind that the file name is not important here because there is no label to consider (as in the previous examples)

def extractFeaturesTargets(drummerNum, file):
    features = np.array([drummerNum, file.ticksPerSixteenthNote, file.tempo])
    
    targets = np.zeros((1,24))
    snareVelocity = []
    snareMicrotiming = []
    
    ticksPerSixteenthNote = file.ticksPerSixteenthNote
    
    # Put velocities and microtiminging percentages into seperate arrays
    for instrumentNum in file.instruments:
        if instrumentNum == 38 or instrumentNum == 40:
            for note in file.instruments[instrumentNum]:
                snareVelocity.append(note.velocity)
                snareMicrotiming.append(getPercentOffBeat(note.beginTick, ticksPerSixteenthNote))
    
    # 15 velocity bins
    velocityBins = np.arange(0, 127, 8)
    # 9 histogram bins
    microtimingBins = np.arange(0, 1, 0.1)
    
    snareVelocityHist, bins = np.histogram(snareVelocity, bins=velocityBins)
    snareMicrotimingHist, bins = np.histogram(snareMicrotiming, bins=microtimingBins)

    if(sum(snareVelocityHist) == 0 or sum(snareMicrotimingHist) == 0):
        return 0, 0
    else:
        snareVelocityHist = snareVelocityHist / sum(snareVelocityHist)
        snareMicrotimingHist = snareMicrotimingHist / sum(snareMicrotimingHist)
    
    targets = np.concatenate((snareVelocityHist, snareMicrotimingHist))
    
    return features, targets



In [43]:
features = np.zeros((numberOfFiles,3)) 
targets = np.zeros((numberOfFiles,24))

for i, drummer in enumerate(drummersList):
    for file in drummer.files:
        feature, target = extractFeaturesTargets(i, file)
        if isinstance(feature, int):
            i = 0
        else:
            features[i, :] = feature
            targets[i, :] = target
            
print(len(features))

1110


In [44]:
from sklearn.model_selection import train_test_split

#splitting the dataset in training and testing parts
feat_train, feat_test, tar_train, tar_test = train_test_split(features, labels, test_size=0.2)

In [45]:
from sklearn.neural_network import MLPRegressor

pipe = Pipeline([
        ('scaler', StandardScaler()),
        ('regressor', MLPRegressor(hidden_layer_sizes=(10,5), max_iter=2000, tol=0.0001, activation='tanh'))
        ])

In [54]:
#training the pipeline
pipe.fit(feat_train, tar_train)

#applying the trained pipeline
tar_pred = pipe.predict(feat_test)

In [55]:
#computing a set of performance metrics

#mean squared error (lower the better)
print('Mean squared error: %.4f'% sklearn.metrics.mean_squared_error(tar_test, tar_pred))

#mean absolute error (lower the better)
print('Mean absolute error: %.4f'% sklearn.metrics.mean_absolute_error(tar_test, tar_pred))

#median absolute error (lower the better)
print('Median absolute error: %.4f'% sklearn.metrics.median_absolute_error(tar_test, tar_pred))

#coefficient of determination (r2 score): 1 is perfect prediction (it can get arbitrary negative)
print('Coefficient of determination (R2 score): %.4f'% sklearn.metrics.r2_score(tar_test, tar_pred))

#explained variance score: 1 is perfect prediction (it can get arbitrary worse)
print('Explained variance score: %.4f'% sklearn.metrics.explained_variance_score(tar_test, tar_pred))

#this is is a better represenation of the r2 score when working with multiple outputs
#as it provides a value for each target value
print('R2 score on individual targets',sklearn.metrics.r2_score(tar_test, tar_pred, multioutput='raw_values') )

Mean squared error: 0.0698
Mean absolute error: 0.0247
Median absolute error: 0.0001
Coefficient of determination (R2 score): 0.8562
Explained variance score: 0.8574
R2 score on individual targets [0.85618546]
