In [11]:
import numpy as np
import pandas as pd
from IPython.display import clear_output
from glob import glob
from scipy.io import wavfile
import scipy 
import IPython
import os
import shutil

# 1. 전체 파일 리스트업

In [12]:
ori_flist = np.unique([ f.split('.')[0] for f in glob('input/respiratory-sound-database/audio_and_txt_files/*') ])

# 2. 전처리 함수

In [13]:
# [필수] 오디오 Sampling Rate 통일하기 위한 함수 (Resampling)
def ensure_sample_rate(original_sample_rate, waveform,
                       desired_sample_rate=16000):
    """Resample waveform if required."""
    if original_sample_rate != desired_sample_rate:
        desired_length = int(round(float(len(waveform)) /
                                   original_sample_rate * desired_sample_rate))
        waveform = scipy.signal.resample(waveform, desired_length)
    return desired_sample_rate, waveform

In [18]:
# [선택] 기타 전처리

upperCutoffFreq = 3000
cutoffFrequencies = [80, upperCutoffFreq]
gSampleRate = 16000

#FIR coefficients for a bandpass filter with a window of 80-3000 Hz
highPassCoeffs = scipy.signal.firwin(401, cutoffFrequencies, fs=gSampleRate, pass_zero="bandpass")

def applyHighpass(npArr):
    return scipy.signal.lfilter(highPassCoeffs, [1.0], npArr)

#Higher gamma results in more aggressive compression
def applyLogCompressor(signal, gamma):
    sign = np.sign(signal)
    absSignal = 1 + np.abs(signal) * gamma
    logged = np.log(absSignal)
    scaled = logged * (1 / np.log(1.0 + gamma)) #Divide by the maximum possible value from compression
    return sign * scaled

#Scales all samples to ensure the peak signal is 1/-1
def normalizeVolume(npArr):
    minAmp, maxAmp = (np.amin(npArr), np.amax(npArr))
    maxEnv = max(abs(minAmp), abs(maxAmp))
    scale = 1.0 / maxEnv
    #in place multiply
    npArr *= scale
    return npArr

# 3. 데이터 파일 생성

In [19]:
def extract_data(f, outpath, preprocess=False):

    sr, audio_wave_data = wavfile.read(f+'.wav', False)
    sr, audio_wave_data = ensure_sample_rate(sr, audio_wave_data)
        
    df = pd.read_csv(f  + '.txt', delimiter='\t', header=None)
    
    # 길이가 20초 미만 이거나, 처음 20초 동안 호흡이 최소 2회 없었던 경우 제거
    df = df[df[1]<=20]
    if len(df) < 2 or len(audio_wave_data) < sr*20:
        return
    
    if preprocess == True:
        #Removing the low-freq noise, re-normalizing volume then apply compressor
        audio_wave_data = normalizeVolume(applyHighpass(audio_wave_data))
        #audio_wave_data = applyLogCompressor(audio_wave_data, 30)

    audio_wave_data = audio_wave_data * (2 ** 15 - 1) / np.max(np.abs(audio_wave_data))
    scipy.io.wavfile.write(outpath +f.split('/')[-1] + '.wav', sr, audio_wave_data.astype(np.int16)[:sr*20])    
    
    np.save( outpath +f.split('/')[-1]  + '.npy', np.array([ np.mean(df[1] - df[0]), np.var(df[1] - df[0])]))

In [20]:
# 기존 파일 삭제
if os.path.exists('data'):
    shutil.rmtree('data')

if os.path.exists('data_pr'):
    shutil.rmtree('data_pr')
    
os.makedirs('data')
os.makedirs('data_pr')

# 새로 생성
for i, f in enumerate(ori_flist):
    
    print("{}/{} : {}".format(i, len(ori_flist), f))

    extract_data(f, 'data/', False)
    extract_data(f, 'data_pr/', True)    
    
    clear_output(wait = True)

919/920 : input/respiratory-sound-database/audio_and_txt_files/226_1b1_Pl_sc_LittC2SE


### 샘플 들어보기

In [21]:
IPython.display.Audio('data/226_1b1_Pl_sc_LittC2SE.wav')

In [22]:
IPython.display.Audio('data_pr/226_1b1_Pl_sc_LittC2SE.wav')