# Guitar Type Classification

In [1]:
import numpy as np
import os
import essentia.standard as es
from scipy.io import wavfile
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix

## Functions

In [2]:
#Function to select the data of the database depending on the speed and the genres to be used
def select_dataset(path_to_dataset,velocity_list,genre_list):
    files=[]
    labels=[]
    for velocity in velocity_list:
        for genre in genre_list:
            
            file_route=path_to_dataset+'acoustic_mic/'+velocity+'/'+genre+'/audio/'
            for filename in os.listdir(file_route):
                files.append(file_route+filename)
                labels.append('acoustic')
                
            file_route=path_to_dataset+'acoustic_pickup/'+velocity+'/'+genre+'/audio/'
            for filename in os.listdir(file_route):
                files.append(file_route+filename)
                labels.append('acoustic')
                
            file_route=path_to_dataset+'Career SG/'+velocity+'/'+genre+'/audio/'
            for filename in os.listdir(file_route):
                files.append(file_route+filename)
                labels.append('electric')
                
            file_route=path_to_dataset+'Ibanez 2820/'+velocity+'/'+genre+'/audio/'
            for filename in os.listdir(file_route):
                files.append(file_route+filename)
                labels.append('electric')
      
    # Return an array with the path of all the files to use and another array with the corresponding labels
    return files,labels

#Function to extract sound features frame by frame of the dataset selected
def extract_features(files,labels):
    fs=44100
    M=44100 #frames of 1 second length
    N=1024
    H=int(M/2)

    #Setting up Essentia functions to be used
    spectrum = es.Spectrum(size=N)
    window = es.Windowing(size=M, type='hann')
    mfcc_algo = es.MFCC()
    barkbands_algo = es.BarkBands()
    LLSEE=es.LowLevelSpectralEqloudExtractor()
    
    frames_labels=[]
    frames_features=[]
    
    # For loop to run all the dataset files
    for file in files:
        
        x=es.MonoLoader(filename = file)() 
        x=x[15*fs:len(x)] # Remove the first 15 seconds of the audio, where is a rhythm to define the tempo
        
        label=labels[files.index(file)] # Set the label of the current file
        
        # Move along the file frame by frame
        for frame in es.FrameGenerator(x, frameSize=M, hopSize=H, startFromZero=True):              
            mX = spectrum(window(frame))
            
            # Extract features of the spectrum of the frame
            _, mfcc_coeffs = mfcc_algo(mX)
            barkbands=barkbands_algo(mX)
            scvalleys=LLSEE(mX)[2]
            frames_features.append(list(mfcc_coeffs)+list(barkbands)+list(scvalleys[0])) #Set all the features of a frame as a list
            frames_labels.append(label) #Set a label for each frame

    frames_features = np.array(frames_features,dtype=object)
    
    #return a group of arrays with all the features of each frame and the corresponding labels
    return frames_features, frames_labels

## Training model

In [3]:
# Path of the dataset directory
path_to_dataset='IDMT-SMT-GUITAR_V2/dataset4/'

training_files=[]
training_labels=[]

# Select the velocity and the genres of the files to use
velocity_list=['slow','fast']
genre_list=['classical','jazz','metal','pop']

# Select the files to train the model and extract their features
[training_files,training_labels]=select_dataset(path_to_dataset,velocity_list,genre_list)
[training_features,frames_labels]=extract_features(training_files,training_labels)

# Train a Support Vector Machine algorithm with the features and the labels obtained
classification_algo=SVC()
classification_algo.fit(training_features,frames_labels)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

## Testing model

In [4]:
# Select the velocity and the genres of the files to use
velocity_list=['slow','fast']
genre_list=['reggae_ska','country_folk','rock_blues','latin']

# Select the files to test the model and extract their features
[test_files,test_labels]=select_dataset(path_to_dataset,velocity_list,genre_list)
[test_features,test_labels]=extract_features(test_files,test_labels)

# Predict the labels of each frame of the files selected with the trained algorithm
test_predictions=classification_algo.predict(test_features)

# Compute the accuracy and the confusion matrix obtained from the prediciton
accuracy=accuracy_score(test_labels, test_predictions)
print('Accuracy of the model: ',accuracy,'%\n')
confusion_matrix=confusion_matrix(test_labels, test_predictions)
print('Confusion matrix:')
print('Acoustic Electric')
print(' ',confusion_matrix[0][0],'   ',confusion_matrix[0][1],'  Acoustic')
print(' ',confusion_matrix[1][0],'    ',confusion_matrix[1][1],'  Electric')

Accuracy of the model:  0.689167517131 %

Confusion matrix:
Acoustic Electric
  1534     2075   Acoustic
  57      3193   Electric


## Using the model

In [5]:
# Detect guitar type from an audio file

x='IDMT-SMT-GUITAR_V2/dataset4/acoustic_mic/slow/reggae_ska/audio/reggae_2_100BPM.wav',
[x_features,_]=extract_features(x,'_')

frames_predictions=classification_algo.predict(x_features)
unique_elements, counts_elements = np.unique(frames_predictions, return_counts=True)
maxim=max(counts_elements)
class_predicted=unique_elements[np.where(counts_elements==max(counts_elements))]

print('The type of guitar detected is ',class_predicted)

The type of guitar detected is  ['electric']
