In [1]:
import glob
import os
import librosa
import matplotlib.pyplot as plt
from matplotlib import cm
import tensorflow as tf
from sklearn.model_selection import train_test_split
import numpy as np
%matplotlib inline
plt.style.use('ggplot')

RuntimeError: module compiled against API version 0xb but this version of numpy is 0xa

In [None]:
def windows(data, window_size):
    start = 0    
    while start < len(data):
        yield int(start), int(start + window_size)
        start += (window_size / 2)


def get_features(audio_file):
    X, sample_rate = librosa.load(audio_file)
    stft = np.abs(librosa.stft(X))
    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
    mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr = sample_rate).T, axis=0)    
    return mfccs,chroma,mel,contrast,tonnetz
    
        
def extract_features(data_dir, file_ext="*.wav", bands = 60, frames = 41):
    
    features, labels = np.empty((0, 193)), np.empty(0)
    
    for dirpath, dirnames, filenames in list(os.walk(parent_dir))[1:]:
        for fn in sorted(filenames):
            audio_file = os.path.join(dirpath, fn)            
            label = os.path.dirname(audio_file).split("/")[-1]
            mfccs,chroma,mel,contrast,tonnetz = get_features(audio_file)            
            
            ext_features = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
            features = np.vstack([features, ext_features])
            
            labels = np.append(labels, label)
            
    return np.array(features), np.array(labels, dtype=np.str)


def one_hot_encode(labels):
    n_labels = len(labels)
    n_unique_labels = len(np.unique(labels))
    one_hot_encode = np.zeros((n_labels,n_unique_labels))
    one_hot_encode[np.arange(n_labels), labels] = 1
    return one_hot_encode

In [None]:
parent_dir = os.path.abspath('../letras')
features, labels = extract_features(parent_dir)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(features, labels, random_state=0)

In [None]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors = 5)

In [None]:
knn.fit(X_train, y_train)

In [None]:
knn.score(X_test, y_test)

In [None]:
knn.predict([features[0]])

In [None]:
def get_letter_features(letter, n):
    ft = np.empty((0, 193))
    audio_file = os.path.join(parent_dir, letter, str(n).zfill(3) + '.wav')
    mfccs,chroma,mel,contrast,tonnetz = get_features(audio_file)            
    ext_features = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
    ft = np.array(np.vstack([ft, ext_features]))
    return ft

## Performance dos caracteres alfabéticos

In [None]:
letters = range(ord('a'), ord('z') + 1)
for i in letters:
    char = chr(i)    
    n = 1000
    valid = 0
    for i in range(1, n + 1):
        try:
            letter = get_letter_features(char, i)
            prediction = knn.predict(letter)
            if prediction == char:        
                valid += 1
        except:
            print(char, ':', round(float(valid) / i, 2))
            break            

## Performance dos caracteres numéricos

In [None]:
nums = range(1, 9)
for i in nums:    
    char = str(i)
    n = 1000
    valid = 0
    for i in range(1, n + 1):
        try:
            letter = get_letter_features(char, i)
            prediction = knn.predict(letter)
            if prediction == char:        
                valid += 1
        except:
            print(char, ':', round(float(valid) / i, 2))
            break            