In [1]:
import music21
import numpy as np

# ML stuff
import sklearn
from sklearn.ensemble import RandomForestClassifier as RF
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

In [2]:
chorales = music21.corpus.search('bach', fileExtensions='xml')

In [3]:
def get_data():
    pieces = []
    modes = []
    for i, chorale in enumerate(chorales):
        score = chorale.parse()
        top_voice = score.parts[0].pitches
        pitches = [p.ps for p in top_voice]
        if len(pitches) >= 40:
            keep = pitches[:40]
            pieces.append(keep)
            m = score.analyze('key').mode
            if m == 'minor':
                modes.append(0)
            else:
                modes.append(1)
                
    # num_chorales rows by 40 columns
    numpy_pieces = [np.array(p) for p in pieces]
    numpy_pieces = np.vstack(numpy_pieces)
    numpy_modes = np.array(modes)
        
    X=numpy_pieces
    y=numpy_modes

    SPLIT_IDX = int(X.shape[0] * 0.75)
    X_train = X[:SPLIT_IDX]
    y_train = y[:SPLIT_IDX]
    X_test = X[SPLIT_IDX:]
    y_test = y[SPLIT_IDX:]
    
    return X_train,y_train,X_test,y_test

In [4]:
def classify(X_train,y_train,X_test,y_test):    
    names = ["Nearest Neighbors", "Linear SVM", "RBF SVM", "Gaussian Process",
             "Decision Tree", "Random Forest", "Neural Net", "AdaBoost",
             "Naive Bayes", "QDA"]

    classifiers = [
        KNeighborsClassifier(3),
        SVC(kernel="linear", C=0.025),
        SVC(gamma=2, C=1),
        GaussianProcessClassifier(1.0 * RBF(1.0)),
        DecisionTreeClassifier(max_depth=5),
        RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
        MLPClassifier(alpha=1, max_iter=1000),
        AdaBoostClassifier(),
        GaussianNB(),
        QuadraticDiscriminantAnalysis()]

    for c in classifiers:
        model = c
        model.fit(X_train,y_train)
        score_train = round(model.score(X_train,y_train),3)
        score_test = round(model.score(X_test,y_test),3)
        print("train:",score_train, 'test:',score_test)

In [5]:
X_train,y_train,X_test,y_test = get_data()

In [6]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)
classify(X_train,y_train,X_test,y_test)

(246, 40)
(246,)
(83, 40)
(83,)
train: 0.801 test: 0.614
train: 0.776 test: 0.614
train: 0.996 test: 0.518
train: 0.829 test: 0.602
train: 0.809 test: 0.53
train: 0.801 test: 0.542
train: 0.524 test: 0.518
train: 0.939 test: 0.59
train: 0.569 test: 0.554
train: 0.972 test: 0.566


In [7]:
def get_data():
    pieces = []
    modes = []
    
    LEN=20
    for i, chorale in enumerate(chorales):

        score = chorale.parse()
        
        if len(score.parts) < 2:
            continue
            
        voice1 = score.parts[0].pitches
        voice2 = score.parts[1].pitches
        
        voice1 = [p.ps for p in voice1]
        voice2 = [p.ps for p in voice2]
        
        if not (len(voice1) >= 20 and len(voice2) >= 20):
            continue

        voice1 = voice1[:LEN]
        voice2 = voice2[:LEN]
        piece = voice1 + voice2
        pieces.append(piece)
    
        m = score.analyze('key').mode
        if m == 'minor':
            modes.append(0)
        else:
            modes.append(1)
            
    numpy_pieces = [np.array(p) for p in pieces]
    numpy_pieces = np.vstack(numpy_pieces)
    numpy_modes = np.array(modes)
    
    X=numpy_pieces
    y=numpy_modes
    
    SPLIT_IDX = int(X.shape[0] * 0.75)
    X_train = X[:SPLIT_IDX]
    y_train = y[:SPLIT_IDX]
    X_test = X[SPLIT_IDX:]
    y_test = y[SPLIT_IDX:]
    return X_train,y_train,X_test,y_test

In [8]:
X_train,y_train,X_test,y_test = get_data()

In [9]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)
classify(X_train,y_train,X_test,y_test)

(306, 40)
(306,)
(103, 40)
(103,)
train: 0.81 test: 0.612
train: 0.699 test: 0.612
train: 1.0 test: 0.495
train: 0.922 test: 0.612
train: 0.807 test: 0.621
train: 0.801 test: 0.515
train: 0.5 test: 0.476
train: 0.886 test: 0.573
train: 0.565 test: 0.583
train: 0.954 test: 0.563


In [None]:
# 1 voice 40 notes
train: 0.801 test: 0.614
train: 0.776 test: 0.614
train: 0.996 test: 0.518
train: 0.829 test: 0.602
train: 0.809 test: 0.53
train: 0.801 test: 0.542
train: 0.524 test: 0.518
train: 0.939 test: 0.59
train: 0.569 test: 0.554
train: 0.972 test: 0.566
# 2 voices, 20 notes each
train: 0.81 test: 0.612
train: 0.699 test: 0.612
train: 1.0 test: 0.495
train: 0.922 test: 0.612
train: 0.807 test: 0.621
train: 0.801 test: 0.515
train: 0.5 test: 0.476
train: 0.886 test: 0.573
train: 0.565 test: 0.583
train: 0.954 test: 0.563