In [5]:
import numpy as np
import pandas as pd
from tqdm import tqdm
import pickle
import music21
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC, LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

In [6]:
# The data folder should be in the same directory as the code or you should specify the path.

df = pd.read_csv('data/maestro-v2.0.0.csv')
df['audio_filename'] = df['audio_filename'].str[5:-3] + 'midi'
df.set_index('audio_filename', inplace=True)
classes = ['Baroque', 'Classical', 'Romantic', 'Modern']
classifier = 'period'
class_dist = df.groupby(classifier).period.agg('count').to_frame('countt')

In [8]:
# This cell takes a very long time to run :) To make your work easier, we have saved the results in the
# corresponding pickle files.

X_train = []
X_validation = []
X_test = []
y_train = []
y_validation = []
y_test = []

for filename in tqdm(df.index):
    file = music21.converter.parse('data/maestro-v2.0.0-midi/' + filename)
    s = music21.stream.Stream(file)
    ds = music21.features.DataSet(classLabel=classifier)
    f = [music21.features.jSymbolic.AmountOfArpeggiationFeature,
         music21.features.jSymbolic.AverageMelodicIntervalFeature,
         music21.features.jSymbolic.AverageNumberOfIndependentVoicesFeature,
         music21.features.jSymbolic.AverageNoteDurationFeature,
         music21.features.jSymbolic.AverageRangeOfGlissandosFeature,
         music21.features.jSymbolic.AverageTimeBetweenAttacksFeature,
         music21.features.jSymbolic.AverageTimeBetweenAttacksForEachVoiceFeature,
         music21.features.jSymbolic.BasicPitchHistogramFeature,
         music21.features.jSymbolic.ChangesOfMeterFeature,
         music21.features.jSymbolic.ChromaticMotionFeature,
         music21.features.jSymbolic.CombinedStrengthOfTwoStrongestRhythmicPulsesFeature,
         music21.features.jSymbolic.DirectionOfMotionFeature,
         music21.features.jSymbolic.DistanceBetweenMostCommonMelodicIntervalsFeature,
         music21.features.jSymbolic.DurationOfMelodicArcsFeature,
         music21.features.jSymbolic.FifthsPitchHistogramFeature,
         music21.features.jSymbolic.HarmonicityOfTwoStrongestRhythmicPulsesFeature,
         music21.features.jSymbolic.ImportanceOfBassRegisterFeature,
         music21.features.jSymbolic.ImportanceOfHighRegisterFeature,
         music21.features.jSymbolic.ImportanceOfMiddleRegisterFeature,
         music21.features.jSymbolic.IntervalBetweenStrongestPitchClassesFeature,
         music21.features.jSymbolic.InitialTempoFeature,
         music21.features.jSymbolic.MaximumNumberOfIndependentVoicesFeature,
         music21.features.jSymbolic.MelodicFifthsFeature,
         music21.features.jSymbolic.MelodicIntervalHistogramFeature,
         music21.features.jSymbolic.MelodicThirdsFeature,
         music21.features.jSymbolic.MelodicTritonesFeature,
         music21.features.jSymbolic.MostCommonMelodicIntervalFeature,
         music21.features.jSymbolic.MostCommonPitchClassFeature,
         music21.features.jSymbolic.PitchClassDistributionFeature,
         music21.features.jSymbolic.PitchClassVarietyFeature,
         music21.features.jSymbolic.PrimaryRegisterFeature,
         music21.features.jSymbolic.QualityFeature,
         music21.features.jSymbolic.RepeatedNotesFeature,
         music21.features.jSymbolic.SizeOfMelodicArcsFeature,
         music21.features.jSymbolic.StaccatoIncidenceFeature,
         music21.features.jSymbolic.StepwiseMotionFeature]
    
    ds.addFeatureExtractors(f)
    ds.addData(s)
    ds.process()
    features = np.array([])
    features = np.append(features, ds.getFeaturesAsList(concatenateLists=True))
    features = features[1:-1]
    features = [float(i) for i in features]
    
    if df.at[filename, 'split'] == 'train':
        X_train.append(features)
        y_train.append(classes.index(df.at[filename, classifier]))
    elif df.at[filename, 'split'] == 'validation':
        X_validation.append(features)
        y_validation.append(classes.index(df.at[filename, classifier]))
    elif df.at[filename, 'split'] == 'test':
        X_test.append(features)
        y_test.append(classes.index(df.at[filename, classifier]))

X_train = np.array(X_train) 
X_validation = np.array(X_validation)
X_test = np.array(X_test)
y_train = np.array(y_train)
y_validation = np.array(y_validation)
y_test = np.array(y_test)

100%|██████████| 1282/1282 [20:10:51<00:00, 56.67s/it]    


In [12]:
# Saving pickle files so that we don't run the above cell every time.

with open('X_train.pickle', 'wb') as output:
    pickle.dump(X_train, output)

with open('X_test.pickle', 'wb') as output:
    pickle.dump(X_test, output)
    
with open('X_validation.pickle', 'wb') as output:
    pickle.dump(X_validation, output)

with open('y_train.pickle', 'wb') as output:
    pickle.dump(y_train, output)
    
with open('y_test.pickle', 'wb') as output:
    pickle.dump(y_test, output)
    
with open('y_validation.pickle', 'wb') as output:
    pickle.dump(y_validation, output)

In [166]:
print(X_train.shape)
print(y_train.shape)
print(X_validation.shape)
print(y_validation.shape)
print(X_test.shape)
print(y_test.shape)

(1104, 313)
(1104,)
(137, 313)
(137,)
(178, 313)
(178,)
