# Sliging
하나의 오디오 파일을 일정 구간별로 자릅니다.
realtime으로 오디오를 입력받을 때 오디오를 일정 구간의 buffer로 나누어 받습니다.
이를 학습 과정에서도 동일하게 적용합니다.

In [1]:
import librosa
import os
import numpy as np
import noisereduce as nr
import soundfile as sf
import random

In [2]:
# 1. nr, trimming
# 2. slicing in to 64ms or 128ms

In [3]:
def nr_and_trimming(file):

    audio_data, sample_rate = librosa.load(file, 16000)
    

    # noise reduction
    noisy_part = audio_data[0:10000]
    reduced_noise = nr.reduce_noise(y=audio_data, y_noise=noisy_part, sr=16000)


    # trimming. 무음구간을 제거합니다.
    trimmed, index = librosa.effects.trim(reduced_noise, top_db=5, hop_length=256, frame_length=512)

    return trimmed

In [5]:
# speaking data trimming

speaking_path = r'./Dataset_audio/Speaking/'
speaking_trimmed_path = os.path.join(speaking_path,'trimmed')

for file in os.listdir(speaking_path + 'RIR/'):
    if file.endswith('.wav') | file.endswith('.WAV'):
        trimmed = nr_and_trimming(os.path.join(speaking_path, 'RIR', file))
        sf.write(os.path.join(speaking_trimmed_path, file.split('.')[-2]+'_trimmed.wav'), trimmed, 16000, format='wav')
        
print(len(os.listdir(speaking_trimmed_path)))

8115


In [7]:
# OtherSound(non-speaking) data trimming

OtherSound_path = r'./Dataset_audio/OtherSound/'
OtherSound_trimmed_path = os.path.join(noise_path,'trimmed')

for file in os.listdir(OtherSound_path + 'RIR/'):
    if file.endswith('.wav') | file.endswith('.WAV'):
        trimmed = nr_and_trimming(os.path.join(OtherSound_path, 'RIR', file))
        sf.write(os.path.join(OtherSound_trimmed_path, file.split('.')[-2]+'_trimmed.wav'), trimmed, 16000, format='wav')
        
print(len(os.listdir(OtherSound_trimmed_path)))

8015


In [8]:
# Slicing
from pydub import AudioSegment
speaking_path = r'./Dataset_audio/Speaking/'
# Slicing unit
MS = 256

speaking_trimmed_path = os.path.join(speaking_path + 'trimmed')
speaking_sliced_path = os.path.join(speaking_path + 'sliced')


for file in os.listdir(speaking_trimmed_path):
    trimmed = AudioSegment.from_wav(os.path.join(speaking_trimmed_path, file))
    if len(trimmed) >= 1024: # filtering too short files
        for i in range(0, len(trimmed), MS):
            trimmed[i:i+MS].export(os.path.join(speaking_sliced_path, file.split('.')[-2]+str(i)+'_sliced.wav'), format='wav')

print(len(os.listdir(speaking_sliced_path)))

49808


In [9]:
# Slicing

# Slicing unit
MS = 256 # 256ms
OtherSound_path = r'./Dataset_audio/OtherSound'
OtherSound_trimmed_path = os.path.join(OtherSound_path, 'trimmed')
OtherSound_sliced_path = os.path.join(OtherSound_path, 'sliced')


for file in os.listdir(OtherSound_trimmed_path):
    trimmed = AudioSegment.from_wav(os.path.join(OtherSound_trimmed_path, file))
    if len(trimmed) >= 1024:                                
        for i in range(0, len(trimmed), MS):
            trimmed[i:i+MS].export(os.path.join(OtherSound_sliced_path, file.split('.')[-2]+str(i)+'_sliced.wav'), format='wav')

print(len(os.listdir(OtherSound_sliced_path)))

128201


In [10]:
# balancing. 데이터 간 균형을 맞춰줍니다.
filenum_diff = len(os.listdir(speaking_sliced_path)) - len(os.listdir(OtherSound_sliced_path))
filenum_diff

-78393

In [11]:
erase_files = random.sample(os.listdir(OtherSound_sliced_path), abs(filenum_diff))

for file in os.listdir(OtherSound_sliced_path): # or speaking_sliced_path
    if file in erase_files:
        os.remove(os.path.join(OtherSound_sliced_path, file))

In [12]:
len(os.listdir(speaking_sliced_path)) == len(os.listdir(OtherSound_sliced_path))

True

In [13]:
len(os.listdir(OtherSound_sliced_path))

49808