In [1]:
import ast_preprocess_dataloader
from ast_preprocess_dataloader import AudioPipeline
import torchaudio
import torch
import os
import glob
from torch.utils.data import Dataset, DataLoader
from audiomentations import Compose, TimeStretch, PitchShift, BandPassFilter, Normalize, AddBackgroundNoise
import numpy as np
from transformers import AutoProcessor
from sklearn.model_selection import train_test_split

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
dir_path = r'C:../dataset/audioonly/labeled/orig_resample'
os.path.exists(dir_path)

True

In [3]:
class_name = os.listdir(dir_path)
class_name

['bellypain', 'discomfort', 'hungry', 'tired']

In [4]:
audio_dir = [os.path.join(dir_path, name) for name in class_name]
audio_dir

['C:../dataset/audioonly/labeled/orig_resample\\bellypain',
 'C:../dataset/audioonly/labeled/orig_resample\\discomfort',
 'C:../dataset/audioonly/labeled/orig_resample\\hungry',
 'C:../dataset/audioonly/labeled/orig_resample\\tired']

In [5]:
bpain_audio = glob.glob(os.path.join(audio_dir[0], '*.wav'))
discomf_audio = glob.glob(os.path.join(audio_dir[1], '*.wav'))
hungry_audio = glob.glob(os.path.join(audio_dir[2], '*.wav'))
tired_audio = glob.glob(os.path.join(audio_dir[3], '*.wav'))

In [6]:
audio_path_class = {
    'bpain': bpain_audio,
    'discomf': discomf_audio,
    'hungry': hungry_audio,
    'tired': tired_audio
}

In [7]:
noise_dir = 'C:./noise'
os.path.exists(noise_dir)

True

In [8]:
noise_path = glob.glob(os.path.join(noise_dir, '*.wav'))

In [9]:
len(noise_path)

20

In [10]:
from random import shuffle

shuffled_noise = shuffle(noise_path)

In [11]:
noise_path

['C:./noise\\N-10_221010_A_3_f_13663.wav',
 'C:./noise\\N-10_220916_A_3_e_12771.wav',
 'C:./noise\\N-10_220916_A_3_e_12761.wav',
 'C:./noise\\N-10_220916_A_3_e_12762.wav',
 'C:./noise\\N-10_220926_A_3_b_09033.wav',
 'C:./noise\\N-10_220916_A_3_e_12764.wav',
 'C:./noise\\N-10_220924_A_3_b_08705.wav',
 'C:./noise\\N-10_221010_A_3_f_13666.wav',
 'C:./noise\\N-10_220923_A_1_a_00311.wav',
 'C:./noise\\N-10_220924_A_3_b_08704.wav',
 'C:./noise\\N-10_221010_A_3_f_13662.wav',
 'C:./noise\\N-10_220923_A_1_a_00310.wav',
 'C:./noise\\N-10_220923_A_1_a_00308.wav',
 'C:./noise\\N-10_220916_A_3_e_12757.wav',
 'C:./noise\\N-10_220923_A_1_a_00305.wav',
 'C:./noise\\N-10_221010_A_3_f_13661.wav',
 'C:./noise\\N-10_220923_A_1_a_00314.wav',
 'C:./noise\\N-10_220923_A_3_b_09136.wav',
 'C:./noise\\N-10_221010_A_3_f_13659.wav',
 'C:./noise\\N-10_220926_A_3_b_09028.wav']

In [12]:
all_paths = []
all_labels = []

for (label, class_name), path_lst in zip(enumerate(audio_path_class), audio_path_class.values()):
    for path in path_lst:
        all_paths.append(path)
        all_labels.append(label)

## **훈련, 검증, 테스트 경로 쪼개기**

In [13]:
train_paths, val_paths, train_labels, val_labels = train_test_split(all_paths, all_labels, test_size=0.3, random_state=42)

In [14]:
# 갯수 확인
print(len(train_paths))
print(len(val_paths))
print(len(train_labels))
print(len(val_labels))

136
59
136
59


In [15]:
val_paths, test_paths, val_labels, test_labels = train_test_split(val_paths, val_labels, test_size=0.5, random_state=42)

In [16]:
# 갯수 확인
print(len(val_paths))
print(len(test_paths))
print(len(val_labels))
print(len(test_labels))

29
30
29
30


## **파이프라인 만들기**

In [17]:
augmentations = Compose([
    AddBackgroundNoise(sounds_path = noise_path,
                       min_snr_db=17,
                       max_snr_db=17,
                       p=0.3),
    TimeStretch(min_rate=0.9, max_rate=1.1, p=0.5),
    PitchShift(min_semitones=-1.1, max_semitones=1.1, p=0.5),
    BandPassFilter(min_center_freq=1500, max_center_freq=1500,
                   max_bandwidth_fraction=1.33, min_bandwidth_fraction=1.33,
                   max_rolloff=12, min_rolloff=12, p=1.0),
    Normalize(p=1.0)
])

train_dataset = AudioPipeline(audio_paths=train_paths, audio_labels=train_labels, sr=20000, transform=augmentations)
val_dataset = AudioPipeline(audio_paths=val_paths, audio_labels=val_labels, sr=20000)
test_dataset = AudioPipeline(audio_paths=val_paths, audio_labels=val_labels, sr=20000)



In [18]:
train_dataloader = DataLoader(train_dataset, batch_size=10, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=10, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=10, shuffle=True)

## **훈련, 검증, 테스트 batch 불러오기**

In [19]:
# 훈련 데이터 배치 불러오기
for batch in train_dataloader:
    input_values, labels = batch
    print(input_values.shape, labels.shape)



torch.Size([10, 1024, 128]) torch.Size([10])




torch.Size([10, 1024, 128]) torch.Size([10])




torch.Size([10, 1024, 128]) torch.Size([10])




torch.Size([10, 1024, 128]) torch.Size([10])




torch.Size([10, 1024, 128]) torch.Size([10])
torch.Size([10, 1024, 128]) torch.Size([10])




torch.Size([10, 1024, 128]) torch.Size([10])




torch.Size([10, 1024, 128]) torch.Size([10])




torch.Size([10, 1024, 128]) torch.Size([10])
torch.Size([10, 1024, 128]) torch.Size([10])
torch.Size([10, 1024, 128]) torch.Size([10])




torch.Size([10, 1024, 128]) torch.Size([10])
torch.Size([10, 1024, 128]) torch.Size([10])




torch.Size([6, 1024, 128]) torch.Size([6])


In [20]:
# 검증 데이터 배치 불러오기
for batch in val_dataloader:
    input_values, labels = batch
    print(input_values.shape, labels.shape)

torch.Size([10, 1024, 128]) torch.Size([10])
torch.Size([10, 1024, 128]) torch.Size([10])
torch.Size([9, 1024, 128]) torch.Size([9])


In [21]:
# 테스트 데이터 배치 불러오기
for batch in test_dataloader:
    input_values, labels = batch
    print(input_values.shape, labels.shape)

torch.Size([10, 1024, 128]) torch.Size([10])
torch.Size([10, 1024, 128]) torch.Size([10])
torch.Size([9, 1024, 128]) torch.Size([9])
