In [2]:
cd ../../

/Users/tom/Documents/REPO/guitar-set


In [3]:
import jams
import librosa
import numpy as np
import glob
import os


In [4]:
def is_note(st, et, ref_ann):
    # checks the midpoint of the note for annotation for now.
    time_check = (float(st) + float(et))/2
    label_output = ref_ann.to_samples([time_check])
    label = bool(len(label_output[0]))
    return label

In [5]:
def do_one(jam, man):
    features = []
    labels = []
    ann_list = jam.annotations['pitch_midi']
    man_list = man.annotations['pitch_midi']
    for ann, ref_ann in zip(ann_list, man_list):
        feature_len = len(ann.sandbox.features)
        feature = np.array(ann.sandbox.features)
        if len(features) == 0:
            features = feature
        elif len(feature) == 0:
            pass
        else:
            features = np.vstack((features, feature))
            
        for note in ann:
            st = float(note.time)
            et = float(st + note.duration)
            label = is_note(st, et, ref_ann)
            labels.append(label)
    labels = np.array(labels)
    return features, labels

In [6]:
# First try on one single jams file
jams_path = '/Users/tom/Music/DataSet/test-set_processed/eh_BN1-129-Eb_c_hex_cln.jams'
man_path = '/Users/tom/Music/DataSet/test-set_man/eh_BN1-129-Eb_c_man.jams'
wav_path = '/Users/tom/Music/DataSet/test-set_processed/eh_BN1-129-Eb_c_hex_cln.wav'

jam = jams.load(jams_path)
man = jams.load(man_path)
features, labels = do_one(jam, man)

In [7]:
def do_many(jam_dir, man_dir):
    big_features = np.empty((0,7), float)
    big_labels = np.empty((0,0), int)
    jams_list = glob.glob(os.path.join(jam_dir, '*.jams'))
    man_list = glob.glob(os.path.join(man_dir, '*.jams'))
    
    for j_path, m_path in zip(jams_list, man_list):
        print('im here~')
        j = jams.load(j_path)
        m = jams.load(m_path)
        features, labels = do_one(j, m)
        big_features = np.append(big_features, features, axis=0)
        big_labels = np.append(big_labels, labels)
    return big_features, big_labels

In [8]:
jams_dir = '/Users/tom/Music/DataSet/test-set_processed/'
man_dir = '/Users/tom/Music/DataSet/test-set_man/'
big_features, big_labels = do_many(jams_dir, man_dir)

im here~
im here~
im here~
im here~
im here~
im here~
im here~
im here~


In [9]:
big_features.shape

(1339, 7)

In [10]:
# Normalize features
from sklearn import preprocessing
features_normed = preprocessing.scale(big_features)
labels = big_labels.astype(int)

In [11]:
# sklearn SVM
from sklearn import svm
clf = svm.SVC()
clf.fit(features_normed, labels)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [12]:
# Hyperparameter search and validation

from __future__ import print_function

from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.svm import SVC

In [13]:
# To apply an classifier on this data, we need to flatten the image, to
# turn the data in a (samples, feature) matrix:
n_samples = big_features.shape[0]
X = big_features
y = big_labels

# Split the dataset in two equal parts
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.5, random_state=0)

# Set the parameters by cross-validation
tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4],
                     'C': [1, 10, 100, 1000]},
                    {'kernel': ['linear'], 'C': [1, 10, 100, 1000]}]

scores = ['precision', 'recall']

for score in scores:
    print("# Tuning hyper-parameters for %s" % score)
    print()

    clf = GridSearchCV(SVC(), tuned_parameters, cv=5,
                       scoring='%s_macro' % score)
    clf.fit(X_train, y_train)

    print("Best parameters set found on development set:")
    print()
    print(clf.best_params_)
    print()
    print("Grid scores on development set:")
    print()
    means = clf.cv_results_['mean_test_score']
    stds = clf.cv_results_['std_test_score']
    for mean, std, params in zip(means, stds, clf.cv_results_['params']):
        print("%0.3f (+/-%0.03f) for %r"
              % (mean, std * 2, params))
    print()

    print("Detailed classification report:")
    print()
    print("The model is trained on the full development set.")
    print("The scores are computed on the full evaluation set.")
    print()
    y_true, y_pred = y_test, clf.predict(X_test)
    print(classification_report(y_true, y_pred))
    print()

# Tuning hyper-parameters for precision



  'precision', 'predicted', average, warn_for)


Best parameters set found on development set:

{'kernel': 'linear', 'C': 100}

Grid scores on development set:

0.620 (+/-0.452) for {'kernel': 'rbf', 'C': 1, 'gamma': 0.001}
0.445 (+/-0.093) for {'kernel': 'rbf', 'C': 1, 'gamma': 0.0001}
0.578 (+/-0.238) for {'kernel': 'rbf', 'C': 10, 'gamma': 0.001}
0.514 (+/-0.094) for {'kernel': 'rbf', 'C': 10, 'gamma': 0.0001}
0.578 (+/-0.238) for {'kernel': 'rbf', 'C': 100, 'gamma': 0.001}
0.526 (+/-0.091) for {'kernel': 'rbf', 'C': 100, 'gamma': 0.0001}
0.578 (+/-0.238) for {'kernel': 'rbf', 'C': 1000, 'gamma': 0.001}
0.519 (+/-0.085) for {'kernel': 'rbf', 'C': 1000, 'gamma': 0.0001}
0.612 (+/-0.324) for {'kernel': 'linear', 'C': 1}
0.622 (+/-0.319) for {'kernel': 'linear', 'C': 10}
0.706 (+/-0.281) for {'kernel': 'linear', 'C': 100}
0.587 (+/-0.291) for {'kernel': 'linear', 'C': 1000}

Detailed classification report:

The model is trained on the full development set.
The scores are computed on the full evaluation set.

             precision   