In [1]:
# from IPython.display import Audio
from tqdm import tqdm
# from tqdm.notebook import tqdm
import os
import subprocess
import shutil
import random
from glob import glob

import librosa
import librosa.display

import numpy as np
import matplotlib.pyplot as plt


import time

In [2]:
class FeatureExtraction:
    def __init__(self, n_mels=128):
        self.n_mels = n_mels
        self.y = None
        self.sr = 11025
        self.S = None
        self.log_S = None
        self.mfcc = None
        self.delta_mfcc = None
        self.delta2_mfcc = None
        self.M = None
        self.rmse = None
        self.foldername = None
        self.filename=None
    
    def loadFile(self, foldernname):
        self.foldernname=foldernname
        self.y, self.sr = librosa.load(foldernname)
#         logger.debug('File loaded: %s', foldernname)
    
    def load_y_sr(self, y, sr):
        self.y = y
        self.sr = sr
    
    def melspectrogram(self):
        self.S = librosa.feature.melspectrogram(self.y, sr=self.sr, n_mels=self.n_mels)
        self.log_S = librosa.amplitude_to_db(self.S)
    
    def plotmelspectrogram(self, save=True):
        fig = plt.figure(figsize=(12, 4))
        librosa.display.specshow(self.log_S, sr=self.sr, x_axis='time', y_axis='mel')
        plt.title(f'mel Power Spectrogram ({self.filename})')
        plt.colorbar(format='%+02.0f dB')
        plt.tight_layout()
        if not os.path.exists('mel'):
            os.mkdir('mel')
        if save:
            fig.savefig(f'./mel/{self.filename}-mel.png', dpi=fig.dpi)
            print(f'Saved to ./mel/{self.filename}-mel.png')
            plt.close('all')

    def extractmfcc(self, n_mfcc=13):
        self.mfcc = librosa.feature.mfcc(S=self.log_S, n_mfcc=n_mfcc)
        self.delta_mfcc = librosa.feature.delta(self.mfcc,mode='nearest')
        self.delta2_mfcc = librosa.feature.delta(self.mfcc, order=2,mode='nearest')
        self.M = np.vstack([self.mfcc, self.delta_mfcc, self.delta2_mfcc])
    
    def plotmfcc(self,save=False):
        fig = plt.figure(figsize=(12, 6))
        plt.subplot(3, 1, 1)
        librosa.display.specshow(self.mfcc)
        plt.title(f'mel Power Spectrogram ({self.filename})')
        plt.ylabel('MFCC')
        plt.colorbar()
        
        plt.subplot(3, 1, 2)
        librosa.display.specshow(self.delta_mfcc)
        plt.title(f'mel Power Spectrogram ({self.filename})')
        plt.ylabel('MFCC-$\Delta$')
        plt.colorbar()
        
        plt.subplot(3, 1, 3)
        librosa.display.specshow(self.delta2_mfcc, sr=self.sr, x_axis='time')
        plt.title(f'mel Power Spectrogram ({self.filename})')
        plt.ylabel('MFCC-$\Delta^2$')
        plt.colorbar()
        
        plt.tight_layout()
        if not os.path.exists('mfcc'):
            os.mkdir('mfcc')
        if save:
            fig.savefig(f'./mfcc/{self.filename}-mfcc.png', dpi=fig.dpi)
            print(f'Saved to ./mfcc/{self.filename}-mfcc.png')
            plt.close('all')

    def extractrmse(self):
        self.rmse = librosa.feature.rms(y=self.y)

In [3]:
class Feature2:
    def __init__(self):
        self.X = np.empty((0,23))
        self.Y = np.empty(0)
        self.DATASET = None
        self.PATH_ARRAY = []
        self.failed_file = []
        self.unexpected_label = []
        self.processed_counter = 0 
        
    def create_dataset(self,dataset_path,output_path):
        self.DATASET_PATH = dataset_path
        self.OUTPUT_PATH = output_path
        self.__process_dataset()
        self.__write_to_file()
        
    def update_mypbar(self):
        self.pbar.update(1)
        time.sleep(0.01)
        self.pbar.refresh()
        
    def __process_dataset(self):
        starttime = time.time()
        for i , (dirpath, dirnames, filenames) in enumerate(os.walk(self.DATASET_PATH)):
            if dirpath is not self.DATASET_PATH:
                label = dirpath.split("\\")[-1]
            # print(label)
                print("Processing:", label)
                self.pbar = tqdm(total=len(filenames))
                for file in filenames:
                    file_path = os.path.join(dirpath,file)
                    mfccs, rmse, spectral_flux, zcr = self.feature_extraction(file_path)
#                     try:
#                         mfccs, rmse, zcr, mel, chroma, tonnetz = self.feature_extraction(file_path)
#                     except Exception as e:
#                         print("[Error] there was an error in feature extraction. %s" % (e))
#                         self.failed_file.append(file_path)
#                         continue
                        
                    feature_vector = np.hstack([mfccs, rmse, spectral_flux, zcr])
                    
                    self.update_mypbar()
                    self.X = np.vstack((self.X,[feature_vector]))
                    if label == 'success':
                        label = 0
                    elif label == 'stuttered':
                        label = 1
                    self.Y = np.append(self.Y, label)

#                     if label == 'success':
# #                         self.Y = np.vstack((self.Y,[0,1]))
#                         self.processed_counter += 1
# #                         print("Done ", self.processed_counter, file_path,' label=',label)
#                     elif label == 'stuttered':
# #                         self.Y = np.vstack((self.Y,[1,0]))
#                         self.processed_counter += 1
# #                         print("Done ", self.processed_counter, file_path,' label=',label)
#                     else:
#                         self.unexpected_label.append(file_path)
#                         print("Fail ", self.processed_counter, file_path,' label=',label)
                self.pbar.close()
        
                                 

        for fail in self.unexpected_label:
            print("unexpected_label ", file_path, " !")

        for fail in self.failed_file:
            print("fail ", file_path, " !")

        # print("finished all!")
        print('Time taken = {} seconds'.format(time.time() - starttime)) 
        lab = np.expand_dims(fea.Y, axis=1)
        self.DATASET = np.hstack((self.X,lab))        
        
    def __write_to_file(self):
        if os.path.exists(self.OUTPUT_PATH):
            os.remove(self.OUTPUT_PATH)
        np.savetxt(self.OUTPUT_PATH, self.DATASET, delimiter=',')
        
        print(f'Saved to {self.DATASET_PATH}/features.npy')  
        np.save(f'{self.DATASET_PATH}/features.npy',self.X)
        
        print(f'Saved to {self.DATASET_PATH}/label.npy')  
        np.save(f'{self.DATASET_PATH}/label.npy',self.Y)
        
        print('Saved to',self.OUTPUT_PATH)          
        
    

    def feature_extraction(self,file_name):
        #X, sample_rate = sf.read(file_name, dtype='float32')
        X , sample_rate = librosa.load(file_name, sr=None,mono=True) #Can also load file using librosa
        if X.ndim > 1:
            X = X[:,0]
        X = X.T

        ## stFourier Transform
        stft = np.abs(librosa.stft(X))

        mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=20).T, axis=0) #Returns N_mel coefs
        rmse = np.mean(librosa.feature.rms(y=X).T, axis=0) #RMS Energy for each Frame (Stanford's). Returns 1 value 
        spectral_flux = np.mean(librosa.onset.onset_strength(y=X, sr=sample_rate).T, axis=0) #Spectral Flux (Stanford's). Returns 1 Value
        zcr = np.mean(librosa.feature.zero_crossing_rate(y=X).T, axis=0) #Returns 1 value

#         mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T, axis=0) #Returns 128 values
#         chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0) #Returns 12 values
#         contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T, axis=0) #Returns 7 values
#         tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T, axis=0) #tonal centroid features Returns 6 values

        ##Return computed audio features
        return mfccs, rmse, spectral_flux, zcr



In [4]:
a = Feature2()
mfccs, rmse, spectral_flux, zcr = a.feature_extraction('Data/train2_augmented2/11025hz/success/F_0101_10y4m_1-000012-had.wav')



In [None]:
print(mfccs.shape)
print(rmse.shape)
print(spectral_flux.shape)
print(zcr.shape)
# print(mel.shape)
# print(chroma.shape)
# print(tonnetz.shape)

In [None]:
try:
    features = FeatureExtraction()
    features.loadFile('Data/train2_augmented2/11025hz\stuttered\F_0101_10y4m_1-000006-um.wav')
    features.melspectrogram()
    features.extractmfcc()
    features.extractrmse()
except:
    print("no")

In [None]:
for i , (dirpath, dirnames, filenames) in enumerate(os.walk('Data/train2_augmented2/11025hz')):
    label = dirpath.split("\\")[-1]
    print(dirnames)

In [36]:
lab = np.expand_dims(fea.Y, axis=1)

In [39]:
print(fea.X.shape)

(57, 23)


In [42]:
print(fea.Y.shape)

(57,)


In [47]:
np.hstack((fea.X,lab))[0]

array(['-264.741455078125', '167.0384521484375', '-12.749808311462402',
       '5.457185745239258', '-13.268021583557129', '8.230816841125488',
       '-18.819334030151367', '-5.077695369720459', '-5.38155460357666',
       '-17.635957717895508', '2.3250343799591064', '7.328535556793213',
       '4.429831027984619', '14.879932403564453', '-16.86817169189453',
       '-0.16950522363185883', '-2.0041067600250244', '-5.45823860168457',
       '-1.3517531156539917', '-6.393125534057617', '0.10898221284151077',
       '2.1354281902313232', '0.03546463815789474', 'stuttered'],
      dtype='<U32')

In [9]:
fea = Feature2()
dirs = '20201126-all-clean-augmented'
fea.create_dataset(f'Data/{dirs}/ori',f'Data/{dirs}/train_feature2.csv')

  0%|▏                                                                                | 6/3420 [00:00<01:35, 35.81it/s]

Processing: stuttered


100%|██████████████████████████████████████████████████████████████████████████████| 3420/3420 [01:46<00:00, 32.02it/s]
  0%|▏                                                                                | 7/3492 [00:00<01:24, 41.09it/s]

Processing: success


100%|██████████████████████████████████████████████████████████████████████████████| 3492/3492 [01:50<00:00, 31.58it/s]


Time taken = 217.4068009853363 seconds
Saved to Data/20201126-all-clean-augmented/ori/features.npy
Saved to Data/20201126-all-clean-augmented/ori/label.npy
Saved to Data/20201126-all-clean-augmented/train_feature2.csv


In [None]:
import json
import os
import math
import librosa
import time
import multiprocessing

class Dataset:
    def __init__(self,):
        self.X = np.empty(shape=(0,80))
        self.Y = np.empty(shape=(0,2))
        self.DATASET = None
        self.PATH_ARRAY = []
        self.failed_file = []
        self.unexpected_label = []
        self.processed_counter = 0
        # self.TRACK_DURATION
        # self.SAMPLES_PER_TRACK
        print("Object created!")

    def create_dataset(self,dataset_path,output_path):
        self.DATASET_PATH = dataset_path
        self.OUTPUT_PATH = output_path
        self.__process_dataset()
        self.__write_to_file()
        
    def update_mypbar(self):
        self.pbar.update(1)
        time.sleep(0.01)
        self.pbar.refresh()

    def __process_dataset(self):
        starttime = time.time()
        for i , (dirpath, dirnames, filenames) in enumerate(os.walk(self.DATASET_PATH)):
            if dirpath is not self.DATASET_PATH:
                label = dirpath.split("\\")[-1]
            # print(label)
                print("Processing:", label)
                self.pbar = tqdm(total=len(filenames))
                for file in filenames:
                    file_path = os.path.join(dirpath,file)
                    

                  # print(file_path)

                      #exctract mfcc
                    try:
                        features = FeatureExtraction()
                        features.loadFile(file_path)
                        features.melspectrogram()
                        features.extractmfcc()
                        features.extractrmse()
                    except ValueError:
                        self.failed_file.apppend(file_path)

                    feature_vector = []

                    for feature in features.mfcc:
                        feature_vector.append(np.mean(feature))
                        feature_vector.append(np.var(feature))

                    for feature in features.delta_mfcc:
                        feature_vector.append(np.mean(feature))
                        feature_vector.append(np.var(feature))

                    for feature in features.delta2_mfcc:
                        feature_vector.append(np.mean(feature))
                        feature_vector.append(np.var(feature))

                    feature_vector.append(np.mean(features.rmse))
                    feature_vector.append(np.var(features.rmse))
                    self.update_mypbar()
                    self.X = np.vstack((self.X,[feature_vector]))
                    if label == 'success':
                        self.Y = np.vstack((self.Y,[0,1]))
                        self.processed_counter += 1
#                         print("Done ", self.processed_counter, file_path,' label=',label)
                    elif label == 'stuttered':
                        self.Y = np.vstack((self.Y,[1,0]))
                        self.processed_counter += 1
#                         print("Done ", self.processed_counter, file_path,' label=',label)
                    else:
                        self.unexpected_label.append(file_path)
                        print("Fail ", self.processed_counter, file_path,' label=',label)
                self.pbar.close()
        
                             
                    
                    

        for fail in self.unexpected_label:
            print("unexpected_label ", file_path, " !")

        for fail in self.failed_file:
            print("fail ", file_path, " !")

        # print("finished all!")
        print('Time taken = {} seconds'.format(time.time() - starttime))    
        self.DATASET = np.hstack((self.X,self.Y))
        
    def __write_to_file(self):
        if os.path.exists(self.OUTPUT_PATH):
            os.remove(self.OUTPUT_PATH)
        np.savetxt(self.OUTPUT_PATH, self.DATASET)
        print('Saved to',self.OUTPUT_PATH)      

    def load_dataset(self,dataset_path):
        self.DATASET_PATH = dataset_path

        if os.path.exists(self.DATASET_PATH):
            print("Dataset exist!")
        else:
            print('Not found ',self.DATASET_PATH)
            return

        self.FILE_NAME, self.FILE_TYPE = os.path.splitext(self.DATASET_PATH)

        print("Loading ", self.DATASET_PATH)
        if self.FILE_TYPE == '.csv':
            print('Detect as .csv file')
            self.DATA = np.genfromtxt(self.DATASET_PATH, delimiter=',')
        elif self.FILE_TYPE == '.gz':
            print('Detect as .gz file')
            self.DATA = np.loadtxt(self.DATASET_PATH)
        else:
            print("Only support .gz and .csv file")
            return False

            self.X = self.DATA[:, 0:80]
            self.Y = self.DATA[:, 80:]

    def convert_to_csv(self,output_file):
        if os.path.exists(output_file):
            os.remove(output_file)
        np.savetxt(output_file,self.DATA, delimiter=',')
        print('Converted to',output_file)      
    
    def __write_to_file(self):
        if os.path.exists(self.OUTPUT_PATH):
            os.remove(self.OUTPUT_PATH)
        np.savetxt(self.OUTPUT_PATH, self.DATASET)
        print('Saved to',self.OUTPUT_PATH)  

    def get_feature(self):
        return self.X

    def get_label(self):
        return self.Y

In [None]:
os.getcwd()

In [None]:
ds = Dataset()
ds.create_dataset('Data/test/11025hz','Data/test/11025hz/train_11025.gz')
ds.load_dataset('Data/test/11025hz/train_11025.gz')
ds.convert_to_csv('Data/test/11025hz/train_11025.csv')