In [4]:
import glob
import os
import librosa
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
%matplotlib inline
plt.style.use('ggplot')

def windows(data, window_size):
    start = 0
    while start < len(data):
        yield start, start + window_size
        start += (window_size / 2)

def extract_features(parent_dir,sub_dirs,file_ext="*.wav",bands = 60, frames = 41):
    window_size = 512 * (frames - 1)
    log_specgrams = []
    labels = []
    for l, sub_dir in enumerate(sub_dirs):
        print(sub_dir)
        for fn in glob.glob(os.path.join(parent_dir, sub_dir, file_ext)):
            sound_clip,s = librosa.load(fn)
            label = int(sub_dir)
            for (start,end) in windows(sound_clip,window_size):
                
                if(len(sound_clip[int(start):int(end)]) == window_size):
                    signal = sound_clip[int(start):int(end)]
                    melspec = librosa.feature.melspectrogram(signal, n_mels = bands)
                    logspec = librosa.amplitude_to_db(melspec)
                    logspec = logspec.T.flatten()[:, np.newaxis].T
                    log_specgrams.append(logspec)
                    labels.append(label)
            
    log_specgrams = np.asarray(log_specgrams).reshape(len(log_specgrams),bands,frames,1)
    features = np.concatenate((log_specgrams, np.zeros(np.shape(log_specgrams))), axis = 3)
    for i in range(len(features)):
        features[i, :, :, 1] = librosa.feature.delta(features[i, :, :, 0])
    
    return np.array(features), np.array(labels,dtype = np.int)

def one_hot_encode(labels):
    n_labels = len(labels)
    n_unique_labels = len(np.unique(labels))
    one_hot_encode = np.zeros((n_labels,n_unique_labels))
    one_hot_encode[np.arange(n_labels), labels] = 1
    return one_hot_encode

In [None]:
parent_dir = 'voiceDataset'
tr_sub_dirs= ['0','1','2','3','4','5','6','7','8']
tr_features,tr_labels = extract_features(parent_dir,tr_sub_dirs)
tr_labels = one_hot_encode(tr_labels)

In [None]:
from sklearn.model_selection import train_test_split
tr_features, ts_features, tr_labels, ts_labels = train_test_split(tr_features, tr_labels, test_size=0.2, random_state=42)

In [None]:
from sklearn.model_selection import train_test_split
np.save('train_voice_input.npy',tr_features)
np.save('train_voice_label.npy',tr_labels)
np.save('test_voice_input.npy',ts_features)
np.save('test_voice_labels.npy',ts_labels)