In [None]:
from sklearn.svm import LinearSVC, SVC
from sklearn.metrics import classification_report
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold, cross_val_score
from sklearn.feature_selection import RFECV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import numpy as np
import joblib

In [None]:
notes_data = np.load("/Users/jw/Documents/extensive_features/hits_data_notes.npy")
trans_data = np.load("/Users/jw/Documents/extensive_features/hits_data_trans.npy")
# a concatenation of notes and trans
hits_data = np.load("/Users/jw/Documents/extensive_features/hits_data.npy")

In [None]:
label = notes_data[:, -1]
normal = label == 0
bend = label == 1
vibrato = label == 2
hammer = label == 3
pull = label == 4
slide = label == 5
print(f"normal: {np.sum(normal)}, bend: {np.sum(bend)}, vibrato: {np.sum(vibrato)}, hammer: {np.sum(hammer)}, pull: {np.sum(pull)}, slide: {np.sum(slide)}")

In [None]:
label = trans_data[:, -1]
normal = label == 0
bend = label == 1
vibrato = label == 2
hammer = label == 3
pull = label == 4
slide = label == 5
print(f"normal: {np.sum(normal)}, bend: {np.sum(bend)}, vibrato: {np.sum(vibrato)}, hammer: {np.sum(hammer)}, pull: {np.sum(pull)}, slide: {np.sum(slide)}")

In [None]:
label = hits_data[:, -1]
normal = label == 0
bend = label == 1
vibrato = label == 2
hammer = label == 3
pull = label == 4
slide = label == 5
print(f"normal: {np.sum(normal)}, bend: {np.sum(bend)}, vibrato: {np.sum(vibrato)}, hammer: {np.sum(hammer)}, pull: {np.sum(pull)}, slide: {np.sum(slide)}")

In [None]:
def separate_features(start_idx, feature_len):
    start_points = []
    end_points = []
    for j in range(6):
        for i in range(3):
            start_point = start_idx + j * 81 + i * 27
            end_point = start_point + feature_len
            print(f"start {start_point}, end {end_point}")
            start_points.append(start_point)
            end_points.append(end_point)
    return zip(start_points, end_points)

In [None]:
X = hits_data[:, :-1]
y = hits_data[:, -1]

In [None]:
mfcc_idx = separate_features(start_idx=0, feature_len=20)
mfcc_components = []
for start, end in mfcc_idx:
    component = X[:, start:end]
    mfcc_components.append(component)
X_mfcc = np.concatenate(mfcc_components, axis=1)
print(X_mfcc.shape)


In [None]:
pitch_idx = separate_features(start_idx=20, feature_len=1)
pitch_components = []
for start, end in pitch_idx:
    component = X[:, start:end]
    pitch_components.append(component)
X_pitch = np.concatenate(pitch_components, axis=1)
print(X_pitch.shape)


In [None]:
timbre_idx = separate_features(start_idx=21, feature_len=6)
timbre_components = []
for start, end in timbre_idx:
    component = X[:, start:end]
    timbre_components.append(component)
X_timbre = np.concatenate(timbre_components, axis=1)
print(X_timbre.shape)


In [None]:
# 5*2 nested cv with grid search
p_grid = [
    {"svc__C": [1, 10, 100, 1000], "svc__gamma": ["scale", "auto"]},
]
svm = Pipeline([('scaler', StandardScaler()), ('svc', SVC(class_weight="balanced"))])

# cv for the inner and outer loops,
inner_cv = StratifiedKFold(n_splits=2, shuffle=True)
outer_cv = StratifiedKFold(n_splits=5, shuffle=True)

for train_index, test_index in outer_cv.split(X_mfcc, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    search = GridSearchCV(estimator=svm, param_grid=p_grid, scoring="f1_macro", cv=inner_cv, n_jobs=-1)
    search.fit(X_train, y_train)
    print("------------BEST_PARAMS--------")
    print(search.best_params_)
    print("------------BEST_SCORE---------")
    print(search.best_score_)
    print("------------EVAL_DATA----------")
    y_pred = search.predict(X_test)
    print(classification_report(y_test, y_pred, digits=3))    


In [None]:
# 5*2 nested cv with grid search
p_grid = [
    {"svc__C": [1, 10, 100, 1000], "svc__gamma": ["scale", "auto"]},
]
svm = Pipeline([('scaler', StandardScaler()), ('svc', SVC(class_weight="balanced"))])

# cv for the inner and outer loops,
inner_cv = StratifiedKFold(n_splits=2, shuffle=True)
outer_cv = StratifiedKFold(n_splits=5, shuffle=True)

for train_index, test_index in outer_cv.split(X_pitch, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    search = GridSearchCV(estimator=svm, param_grid=p_grid, scoring="f1_macro", cv=inner_cv, n_jobs=-1)
    search.fit(X_train, y_train)
    print("------------BEST_PARAMS--------")
    print(search.best_params_)
    print("------------BEST_SCORE---------")
    print(search.best_score_)
    print("------------EVAL_DATA----------")
    y_pred = search.predict(X_test)
    print(classification_report(y_test, y_pred, digits=3))    


In [None]:
# 5*2 nested cv with grid search
p_grid = [
    {"svc__C": [1, 10, 100, 1000], "svc__gamma": ["scale", "auto"]},
]
svm = Pipeline([('scaler', StandardScaler()), ('svc', SVC(class_weight="balanced"))])

# cv for the inner and outer loops,
inner_cv = StratifiedKFold(n_splits=2, shuffle=True)
outer_cv = StratifiedKFold(n_splits=5, shuffle=True)

for train_index, test_index in outer_cv.split(X_timbre, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    search = GridSearchCV(estimator=svm, param_grid=p_grid, scoring="f1_macro", cv=inner_cv, n_jobs=-1)
    search.fit(X_train, y_train)
    print("------------BEST_PARAMS--------")
    print(search.best_params_)
    print("------------BEST_SCORE---------")
    print(search.best_score_)
    print("------------EVAL_DATA----------")
    y_pred = search.predict(X_test)
    print(classification_report(y_test, y_pred, digits=3))    


In [None]:
# 5*2 nested cv with grid search
p_grid = [
    {"svc__C": [1, 10, 100, 1000], "svc__gamma": ["scale", "auto"]},
]
svm = Pipeline([('scaler', StandardScaler()), ('svc', SVC(class_weight="balanced"))])

# cv for the inner and outer loops,
inner_cv = StratifiedKFold(n_splits=2, shuffle=True)
outer_cv = StratifiedKFold(n_splits=5, shuffle=True)

for train_index, test_index in outer_cv.split(np.concatenate((X_timbre, X_pitch), axis=1), y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    search = GridSearchCV(estimator=svm, param_grid=p_grid, scoring="f1_macro", cv=inner_cv, n_jobs=-1)
    search.fit(X_train, y_train)
    print("------------BEST_PARAMS--------")
    print(search.best_params_)
    print("------------BEST_SCORE---------")
    print(search.best_score_)
    print("------------EVAL_DATA----------")
    y_pred = search.predict(X_test)
    print(classification_report(y_test, y_pred, digits=3))    


In [None]:
# 5*2 nested cv with grid search
p_grid = [
    {"svc__C": [1, 10, 100, 1000], "svc__gamma": ["scale", "auto"]},
]
svm = Pipeline([('scaler', StandardScaler()), ('svc', SVC(class_weight="balanced"))])

# cv for the inner and outer loops,
inner_cv = StratifiedKFold(n_splits=2, shuffle=True)
outer_cv = StratifiedKFold(n_splits=5, shuffle=True)

for train_index, test_index in outer_cv.split(np.concatenate((X_timbre, X_mfcc), axis=1), y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    search = GridSearchCV(estimator=svm, param_grid=p_grid, scoring="f1_macro", cv=inner_cv, n_jobs=-1)
    search.fit(X_train, y_train)
    print("------------BEST_PARAMS--------")
    print(search.best_params_)
    print("------------BEST_SCORE---------")
    print(search.best_score_)
    print("------------EVAL_DATA----------")
    y_pred = search.predict(X_test)
    print(classification_report(y_test, y_pred, digits=3))    


In [None]:
# 5*2 nested cv with grid search
p_grid = [
    {"svc__C": [1, 10, 100, 1000], "svc__gamma": ["scale", "auto"]},
]
svm = Pipeline([('scaler', StandardScaler()), ('svc', SVC(class_weight="balanced"))])

# cv for the inner and outer loops,
inner_cv = StratifiedKFold(n_splits=2, shuffle=True)
outer_cv = StratifiedKFold(n_splits=5, shuffle=True)

for train_index, test_index in outer_cv.split(np.concatenate((X_pitch, X_mfcc), axis=1), y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    search = GridSearchCV(estimator=svm, param_grid=p_grid, scoring="f1_macro", cv=inner_cv, n_jobs=-1)
    search.fit(X_train, y_train)
    print("------------BEST_PARAMS--------")
    print(search.best_params_)
    print("------------BEST_SCORE---------")
    print(search.best_score_)
    print("------------EVAL_DATA----------")
    y_pred = search.predict(X_test)
    print(classification_report(y_test, y_pred, digits=3))    


In [None]:
# 5*2 nested cv with grid search
p_grid = [
    {"svc__C": [1, 10, 100, 1000], "svc__gamma": ["scale", "auto"]},
]
svm = Pipeline([('scaler', StandardScaler()), ('svc', SVC(class_weight="balanced"))])

# cv for the inner and outer loops,
inner_cv = StratifiedKFold(n_splits=2, shuffle=True)
outer_cv = StratifiedKFold(n_splits=5, shuffle=True)

for train_index, test_index in outer_cv.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    search = GridSearchCV(estimator=svm, param_grid=p_grid, scoring="f1_macro", cv=inner_cv, n_jobs=-1)
    search.fit(X_train, y_train)
    print("------------BEST_PARAMS--------")
    print(search.best_params_)
    print("------------BEST_SCORE---------")
    print(search.best_score_)
    print("------------EVAL_DATA----------")
    y_pred = search.predict(X_test)
    print(classification_report(y_test, y_pred, digits=3))    


In [None]:
svc_full = Pipeline([('scaler', StandardScaler()), ('svc', SVC(class_weight="balanced", C=10, gamma="scale"))])
svc_full = svc_full.fit(hits_data[:, :-1], hits_data[:, -1])
joblib.dump(svc_full, "/Users/jw/Documents/unified_clf.joblib")