In [88]:
import sys
import os
import librosa
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from feature_engineering import *
from config import *
import sklearn

In [89]:
from config import CreateDataset

sr = CreateDataset.sr
fs = CreateDataset.fs
hs = CreateDataset.hs
mfcc_dim = CreateDataset.mfcc_dim
cs = CreateDataset.cs
ms = CreateDataset.ms
ts = CreateDataset.ts
data_path = CreateDataset.data_path

def extract_feature(samples):
    result = []
    features = []

    # Timbre features
    spectral_centroid = librosa.feature.spectral_centroid(y=samples, sr=sr, n_fft=fs, hop_length=hs)
    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=samples, sr=sr, n_fft=fs, hop_length=hs)
    spectral_contrast = librosa.feature.spectral_contrast(y=samples, sr=sr, n_fft=fs, hop_length=hs)
    spectral_rollof = librosa.feature.spectral_rolloff(y=samples, sr=sr, n_fft=fs, hop_length=hs)
    spectral_flux = librosa.onset.onset_strength(y=samples, sr=sr, center=True)
    zero_crossing = librosa.feature.zero_crossing_rate(y=samples, frame_length=fs, hop_length=hs)
    mfcc = librosa.feature.mfcc(y=samples, sr=sr, n_fft=fs, hop_length=hs)
    mel_scale = librosa.feature.melspectrogram(y=samples, n_fft=fs, hop_length=hs, sr=sr)
    mel_scale = librosa.power_to_db(mel_scale)
    
    
    # 리듬 feature
    tempo = librosa.beat.tempo(y=samples, sr=sr, hop_length=hs)
    
    # pitch feature
    chroma = librosa.feature.chroma_stft(y=samples, sr=sr, hop_length=hs, n_fft=fs, n_chroma=cs)
#     tonal_centroid = librosa.feature.tonnetz(y=samples, sr=sr)

    features.append(spectral_contrast)
    features.append(spectral_bandwidth)
    features.append(spectral_centroid)
    features.append(spectral_rollof)
    features.append(zero_crossing)
    features.append(spectral_flux)
    
    features.append(tempo)

    for feature in features:
        result.append(np.mean(feature))
        result.append(np.std(feature))

    for i in range(0, mfcc_dim):
        result.append(np.mean(mfcc[i,:]))
        result.append(np.std(mfcc[i, :]))

    for i in range(0, ms):
        result.append(np.mean(mel_scale[i, :]))
        result.append(np.std(mel_scale[i, :]))
        
    for i in range(0, cs):
        result.append(np.mean(chroma[i, :]))
        result.append(np.std(chroma[i, :]))
        
#     for i in range(0, ts):
#         result.append(np.mean(tonal_centroid[i, :]))
#         result.append(np.std(tonal_centroid[i, :]))

    return result

In [90]:
def get_sampels(data_set='train'):
    audios = []
    labels = []
    path_of_audios = librosa.util.find_files(data_path + data_set)
    for audio in path_of_audios:
        labels.append(audio.split('train/')[1].split('_')[0])
        y, sr = librosa.load(audio, sr=22050, duration=4.0)
        audios.append(y)
    audios_numpy = np.array(audios)
    return audios_numpy, labels

is_created = False
audios_numpy, labels = get_sampels(data_set='train')
for samples in audios_numpy:
    row = extract_feature(samples)
    if not is_created:
        dataset_numpy = np.array(row)
        is_created = True
    elif is_created:
        dataset_numpy = np.vstack((dataset_numpy, row))

scaler = sklearn.preprocessing.MinMaxScaler(feature_range=(-1,1))
dataset_numpy = scaler.fit_transform(dataset_numpy)

data_set = pd.DataFrame(dataset_numpy)
data_set["instruments"] = labels

In [91]:
data_set = np.array(data_set)
# Cacluate Shape
row, col = data_set.shape
print(row,col)

1200 85


In [92]:
x = data_set[:, :col-1]
y = data_set[:, col-1]

In [93]:
PATH = librosa.util.find_files("./dataset/test")

test_Y = []
samples = []
for p in PATH:
    test_Y.append(p.split('test/')[1].split('_')[0])
    sample, sr = librosa.load(p, sr=22050, duration=4.0)
    samples.append(sample)

data = np.array([extract_feature(sample) for sample in samples])

scaler = sklearn.preprocessing.MinMaxScaler(feature_range=(-1,1))
test_X = scaler.fit_transform(data)

In [94]:
test_X.shape

(200, 84)

In [95]:
from sklearn.model_selection import cross_validate, ShuffleSplit, GridSearchCV
from sklearn.linear_model import LogisticRegressionCV, SGDClassifier, LogisticRegression
from sklearn import ensemble, naive_bayes, svm, tree, discriminant_analysis, neighbors, feature_selection

In [96]:
MLA = [    
        # Generalized Linear Models
        LogisticRegressionCV(),
        # SVM
        svm.SVC(probability = True),
        svm.LinearSVC(),
        # KNN
        neighbors.KNeighborsClassifier(weights='distance'),
        # Naive Bayes
        naive_bayes.GaussianNB(),
        #Trees    
        tree.DecisionTreeClassifier(),
        ensemble.RandomForestClassifier()
    ]

cv_split = ShuffleSplit(n_splits = 5, test_size = .8, train_size = .2, random_state = 0)
MLA_columns = ['MLA Name','MLA Train Accuracy Mean', 'MLA Test Accuracy Mean','MLA Time']
MLA_compare = pd.DataFrame(columns = MLA_columns)

row_index = 0
for alg in MLA:
    MLA_name = alg.__class__.__name__
    MLA_compare.loc[row_index, 'MLA Name'] = MLA_name
    cv_results = cross_validate(alg, x, y, cv=cv_split, return_train_score=True)
    
    MLA_compare.loc[row_index, 'MLA Time'] = cv_results['fit_time'].mean()
    MLA_compare.loc[row_index, 'MLA Train Accuracy Mean'] = cv_results['train_score'].mean()
    MLA_compare.loc[row_index, 'MLA Val Accuracy Mean'] = cv_results['test_score'].mean()    
    alg.fit(x, y)
    test_Y_hat = alg.predict(test_X)
    accuracy = np.sum((test_Y_hat == test_Y))/200.0*100.0
    MLA_compare.loc[row_index, 'MLA Test Accuracy Mean'] = accuracy
    row_index+=1
    
MLA_compare.sort_values(by = ['MLA Test Accuracy Mean'], ascending = False, inplace = True)

In [97]:
MLA_compare

Unnamed: 0,MLA Name,MLA Train Accuracy Mean,MLA Test Accuracy Mean,MLA Time,MLA Val Accuracy Mean
0,LogisticRegressionCV,0.999167,86.5,2.59744,0.877083
1,SVC,0.798333,84.5,0.0720632,0.630417
2,LinearSVC,1.0,83.0,0.132678,0.889583
6,RandomForestClassifier,0.995833,77.5,0.0472243,0.857083
3,KNeighborsClassifier,1.0,75.0,0.0151406,0.673333
4,GaussianNB,0.95,72.0,0.0139742,0.824583
5,DecisionTreeClassifier,1.0,67.0,0.0345453,0.779375


In [98]:
param_grid = {'C':[0.5,1.0,2.0, 3.0],  # penalty parameter C of the error term
              'kernel':['linear', 'rbf'], # specifies the kernel type to be used in the algorithm  
              'gamma':[0.02, 0.08,0.2,1.0] # kernel coefficient for 'rbf'
             }

# Grid Search
tune_model = GridSearchCV(svm.SVC(), param_grid=param_grid, scoring = 'accuracy', cv = cv_split, return_train_score=True)
tune_model.fit(x, y)

for i in range(5):
    print("epoch:",i,"train_score:",tune_model.cv_results_['split'+str(i)+'_train_score'][tune_model.best_index_],
    "test_score:",tune_model.cv_results_['split'+str(i)+'_test_score'][tune_model.best_index_])

print('-'*5)    

print('AFTER Tuning Parameters: ', tune_model.best_params_)
print("AFTER Tuning Training w/bin score mean: {:.2f}". format(tune_model.cv_results_['mean_train_score'][tune_model.best_index_]*100))
print("AFTER Tuning Test w/bin score mean: {:.2f}". format(tune_model.cv_results_['mean_test_score'][tune_model.best_index_]*100))
print('-'*5)

best_log = tune_model.best_estimator_
print(best_log)
#accuracy on test
test_Y_hat = best_log.predict(test_X)
accuracy = np.sum((test_Y_hat == test_Y))/200.0*100.0
print('test accuracy = ' + str(accuracy) + ' %')

epoch: 0 train_score: 1.0 test_score: 0.9104166666666667
epoch: 1 train_score: 1.0 test_score: 0.9177083333333333
epoch: 2 train_score: 1.0 test_score: 0.8979166666666667
epoch: 3 train_score: 1.0 test_score: 0.9
epoch: 4 train_score: 1.0 test_score: 0.8895833333333333
-----
AFTER Tuning Parameters:  {'C': 2.0, 'gamma': 0.02, 'kernel': 'linear'}
AFTER Tuning Training w/bin score mean: 100.00
AFTER Tuning Test w/bin score mean: 90.31
-----
SVC(C=2.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.02, kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)
test accuracy = 91.0 %


In [99]:
#Hyper Parameters Set
param_grid = {'n_neighbors':[5,6,7,8,9,10],
          'leaf_size':[1,2,3,5],
          'weights':['uniform', 'distance'],
          'n_jobs':[-1]}

tune_model = GridSearchCV(neighbors.KNeighborsClassifier(), param_grid=param_grid, scoring = 'accuracy', cv = cv_split, return_train_score=True)
tune_model.fit(x, y)

for i in range(5):
    print("epoch:",i,"train_score:",tune_model.cv_results_['split'+str(i)+'_train_score'][tune_model.best_index_],
    "test_score:",tune_model.cv_results_['split'+str(i)+'_test_score'][tune_model.best_index_])

print('-'*10)    

print('AFTER Tuning Parameters: ', tune_model.best_params_)
print("AFTER Tuning Training w/bin score mean: {:.2f}". format(tune_model.cv_results_['mean_train_score'][tune_model.best_index_]*100))
print("AFTER Tuning Test w/bin score mean: {:.2f}". format(tune_model.cv_results_['mean_test_score'][tune_model.best_index_]*100))
print('-'*10)

best_log = tune_model.best_estimator_
print(best_log)
#accuracy on test
test_Y_hat = best_log.predict(test_X)
accuracy = np.sum((test_Y_hat == test_Y))/200.0*100.0
print('test accuracy = ' + str(accuracy) + ' %')

epoch: 0 train_score: 1.0 test_score: 0.7166666666666667
epoch: 1 train_score: 1.0 test_score: 0.6791666666666667
epoch: 2 train_score: 1.0 test_score: 0.703125
epoch: 3 train_score: 1.0 test_score: 0.709375
epoch: 4 train_score: 1.0 test_score: 0.6385416666666667
----------
AFTER Tuning Parameters:  {'leaf_size': 1, 'n_jobs': -1, 'n_neighbors': 9, 'weights': 'distance'}
AFTER Tuning Training w/bin score mean: 100.00
AFTER Tuning Test w/bin score mean: 68.94
----------
KNeighborsClassifier(algorithm='auto', leaf_size=1, metric='minkowski',
           metric_params=None, n_jobs=-1, n_neighbors=9, p=2,
           weights='distance')
test accuracy = 74.0 %


In [100]:
param_grid = {'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000],
             'tol': [0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1, 1],
             'random_state' : [42]}

tune_model = GridSearchCV(LogisticRegression(), param_grid, cv = cv_split, scoring= 'accuracy')
tune_model.fit(x, y)

for i in range(5):
    print("epoch:",i,"train_score:",tune_model.cv_results_['split'+str(i)+'_train_score'][tune_model.best_index_],
    "test_score:",tune_model.cv_results_['split'+str(i)+'_test_score'][tune_model.best_index_])

print('-'*5)    

print('AFTER Tuning Parameters: ', tune_model.best_params_)
print("AFTER Tuning Training w/bin score mean: {:.2f}". format(tune_model.cv_results_['mean_train_score'][tune_model.best_index_]*100))
print("AFTER Tuning Test w/bin score mean: {:.2f}". format(tune_model.cv_results_['mean_test_score'][tune_model.best_index_]*100))
print('-'*5)


#let's see the best estimator
best_log = tune_model.best_estimator_
print(best_log)
#accuracy on test
test_Y_hat = best_log.predict(test_X)
accuracy = np.sum((test_Y_hat == test_Y))/200.0*100.0
print('test accuracy = ' + str(accuracy) + ' %')

epoch: 0 train_score: 1.0 test_score: 0.8979166666666667
epoch: 1 train_score: 1.0 test_score: 0.8895833333333333
epoch: 2 train_score: 1.0 test_score: 0.878125
epoch: 3 train_score: 1.0 test_score: 0.8875
epoch: 4 train_score: 1.0 test_score: 0.88125
-----
AFTER Tuning Parameters:  {'C': 100, 'random_state': 42, 'tol': 1e-06}
AFTER Tuning Training w/bin score mean: 100.00
AFTER Tuning Test w/bin score mean: 88.69
-----
LogisticRegression(C=100, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=42, solver='liblinear', tol=1e-06,
          verbose=0, warm_start=False)
test accuracy = 81.5 %




In [101]:
param_grid = {'criterion': ['gini','entropy'], 
              'splitter': ['best', 'random'], 
              'max_depth': [2,4,6,8,10,None], 
              #'min_samples_split': [2,5,7,10,12], 
              #'min_samples_leaf': [1,3,5,7, 10], 
              'random_state': [0] 
             }
tune_model = GridSearchCV(tree.DecisionTreeClassifier(), param_grid=param_grid, scoring = 'accuracy', cv = cv_split, return_train_score=True)
tune_model.fit(x, y)

for i in range(5):
    print("epoch:",i,"train_score:",tune_model.cv_results_['split'+str(i)+'_train_score'][tune_model.best_index_],
    "test_score:",tune_model.cv_results_['split'+str(i)+'_test_score'][tune_model.best_index_])

print('-'*5)    

print('AFTER Tuning Parameters: ', tune_model.best_params_)
print("AFTER Tuning Training w/bin score mean: {:.2f}". format(tune_model.cv_results_['mean_train_score'][tune_model.best_index_]*100))
print("AFTER Tuning Test w/bin score mean: {:.2f}". format(tune_model.cv_results_['mean_test_score'][tune_model.best_index_]*100))
print('-'*10)

best_log = tune_model.best_estimator_
print(best_log)
#accuracy on test
test_Y_hat = best_log.predict(test_X)
accuracy = np.sum((test_Y_hat == test_Y))/200.0*100.0
print('test accuracy = ' + str(accuracy) + ' %')

epoch: 0 train_score: 1.0 test_score: 0.7875
epoch: 1 train_score: 1.0 test_score: 0.8197916666666667
epoch: 2 train_score: 1.0 test_score: 0.8333333333333334
epoch: 3 train_score: 1.0 test_score: 0.7864583333333334
epoch: 4 train_score: 1.0 test_score: 0.7833333333333333
-----
AFTER Tuning Parameters:  {'criterion': 'entropy', 'max_depth': 8, 'random_state': 0, 'splitter': 'best'}
AFTER Tuning Training w/bin score mean: 100.00
AFTER Tuning Test w/bin score mean: 80.21
----------
DecisionTreeClassifier(class_weight=None, criterion='entropy', max_depth=8,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=0,
            splitter='best')
test accuracy = 62.5 %


In [102]:
param_grid = {'n_estimators': [15,25,30,35],
              'criterion': ['gini','entropy'],  #scoring methodology; two supported formulas for calculating information gain - default is gini
              'max_depth': [2,4,6,None], #max depth tree can grow; default is none
              'min_samples_split': [2,5,7,10,12], #minimum subset size BEFORE new split (fraction is % of total); default is 2
              #'min_samples_leaf': [1,3,5], #minimum subset size AFTER new split split (fraction is % of total); default is 1
              'max_features': [2,3,'auto'], #max features to consider when performing split; default none or all
              'random_state': [0] #seed or control random number generator: https://www.quora.com/What-is-seed-in-random-number-generation
             }


tune_model = GridSearchCV(ensemble.RandomForestClassifier(), param_grid=param_grid, scoring = 'accuracy', cv = cv_split, return_train_score=True)
tune_model.fit(x, y)

for i in range(5):
    print("epoch:",i,"train_score:",tune_model.cv_results_['split'+str(i)+'_train_score'][tune_model.best_index_],
    "test_score:",tune_model.cv_results_['split'+str(i)+'_test_score'][tune_model.best_index_])

print('-'*10)    

print('AFTER Tuning Parameters: ', tune_model.best_params_)
print("AFTER Tuning Training w/bin score mean: {:.2f}". format(tune_model.cv_results_['mean_train_score'][tune_model.best_index_]*100))
print("AFTER Tuning Test w/bin score mean: {:.2f}". format(tune_model.cv_results_['mean_test_score'][tune_model.best_index_]*100))
print('-'*10)

best_log = tune_model.best_estimator_
print(best_log)
#accuracy on test
test_Y_hat = best_log.predict(test_X)
accuracy = np.sum((test_Y_hat == test_Y))/200.0*100.0
print('test accuracy = ' + str(accuracy) + ' %')

epoch: 0 train_score: 1.0 test_score: 0.9104166666666667
epoch: 1 train_score: 1.0 test_score: 0.9
epoch: 2 train_score: 1.0 test_score: 0.9020833333333333
epoch: 3 train_score: 1.0 test_score: 0.8989583333333333
epoch: 4 train_score: 1.0 test_score: 0.903125
----------
AFTER Tuning Parameters:  {'criterion': 'gini', 'max_depth': None, 'max_features': 'auto', 'min_samples_split': 2, 'n_estimators': 35, 'random_state': 0}
AFTER Tuning Training w/bin score mean: 100.00
AFTER Tuning Test w/bin score mean: 90.29
----------
RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=35, n_jobs=1,
            oob_score=False, random_state=0, verbose=0, warm_start=False)
test accuracy = 90.5 %
