データセット処理系の最適化検討

In [20]:
import re
import os
import wave
import copy
import random
import numpy as np
import numba
import scipy
import sklearn

from scipy.signal import spectrogram
from sklearn.preprocessing import minmax_scale

In [132]:
class Core_DS:
    def __init__(self):
        self.data_dir = "C:\\Users\\amane\\OneDrive\\python\\proto2020\\bin\\proto2020_093\\data"
        self.save_dir = "C:\\Users\\amane\\OneDrive\\python\\proto2020\\bin\\proto2020_093\\data\\waves"
        self.br = 8
        self.sr = 22050
        self.aug_amt = 512
        self.chunk = 1024


    """対象フォルダ内のWaveファイルの一覧を取得"""
    def elem_wav_search(self,dir):
        x = []

        for i in os.listdir(dir):
            search_index = re.search(".wav",i)
            if search_index:
                x.append(i)
        print ("Files to process:{0}".format(x))

        return x

    """""オーディオファイルの読み込み wave_readオブジェクトを返す"""
    def elem_load_wav(self,dir,file):
        x = np.arange(0)

        #バイナリを読み出す
        x = wave.open(
            os.path.join(dir,file),"rb").readframes(-1)

        return x
    
    """バイナリデータからnumpy配列に変換しノーマライズ/スケーリング"""
    @numba.jit
    def elem_BtoNP_w_norm(self,wr_obj):
        x = np.arange(0)
        x = minmax_scale(
            np.frombuffer(wr_obj,dtype = "int" + str(self.br *2 )),
            feature_range=(-1,1)
        )

        return x

    """ノイズの付与"""
    @numba.jit
    def elem_add_noize(self,w_array):
        x = np.arange(0)
        x = w_array + np.random.randn(len(w_array))*random.uniform(0,0.001)
        return x

    """スペクトログラムの取得"""
    @numba.jit
    def elem_get_spg(self,noized_array):
        noized_array = noized_array.astype(np.float32)    #float32に変換
        spg = np.arange(0)
        freq,tme,spg = spectrogram(
            noized_array,
            fs = self.sr,
            window = np.hamming(self.chunk),
            nfft = self.chunk,
            scaling = "spectrum",
            mode = "magnitude"
        )
        return freq,tme,spg

    """単一ファイルに対するAugmentation処理"""
    @numba.jit
    def elem_aug(self,target_ary):
        for i in range(self.aug_amt):
            #ノイズの付与
            wf = self.elem_add_noize(target_ary)

            #スペクトログラムの取得
            freq,tme,spg = self.elem_get_spg(wf)
            spg = spg.reshape(1,len(freq),len(tme))    #3次元配列に変換

            #水増しされたデータを積み上げる
            if i == 0:
                x= copy.deepcopy(spg)
            else:
                x= np.vstack((x,spg))
        
        return freq,tme,x,i

    """データセットの作成"""
    @numba.jit
    def elem_make_ds(self,dir):
        wave_list = self.elem_wav_search(dir) #ウェーブリストを読み込む
        counter = 0     #カウンター初期化

        for i in wave_list:
            w_file = self.elem_BtoNP_w_norm(
                self.elem_load_wav(dir,i)
                )       #波形を読み込み、ノーマライズ/スケーリング

            freq,tme,auged_spg,count = self.elem_aug(w_file)    #Augmentation処理

            if i == wave_list[0]:
                x= copy.deepcopy(auged_spg)
            else:
                x= np.vstack((x,auged_spg))

            counter = counter + (count+1 )  #カウンター
            print("\rAugmentation count = {}".format(counter),end= "")

        print("\rFiles in",dir,"Augmentation done. \
            \namounts =",counter, "\ndtype =",x.dtype)

        np.random.shuffle(x)    #混ぜる

        return freq,tme,x
    
 

In [134]:
%%time
ds = Core_DS()
freq,time,x = ds.elem_make_ds("C:\\Users\\amane\\OneDrive\\python\\proto2020\\bin\\proto2020_093\\data\\waves")

Files to process:['OK-1.wav', 'OK-2.wav', 'OK-3.wav']
Files in C:\Users\amane\OneDrive\python\proto2020\bin\proto2020_093\data\waves Augmentation done.             
amounts = 1536 
dtype = float32
Wall time: 55.1 s


---
前の処理系の速度(事前に計測)

In [25]:
%%time
ds = Core_DS()
freq,time,x = ds.elem_make_ds("C:\\Users\\amane\\OneDrive\\python\\proto2020\\bin\\proto2020_093\\data\\waves")

Files to process:['OK-1.wav', 'OK-2.wav', 'OK-3.wav']
Files in C:\Users\amane\OneDrive\python\proto2020\bin\proto2020_093\data\waves Augmentation done.             
amounts = 1536 
dtype = float32
Wall time: 57.1 s


## わかったこと
‐ float64→float32への変換はスペクトログラム取得時点でやるのがパフォーマンス上有利