In [1]:
import os
import numpy as np
import librosa
import librosa.display
from tqdm import tqdm

In [2]:
def preprocess_audio(file_path, sr=22050, duration=1.0, n_mels=128):
    y, sr = librosa.load(file_path, sr=sr)
    # Pad audio to ensure it's at least as long as the desired duration
    if len(y) < sr * duration:
        y = np.pad(y, (0, sr * duration - len(y)), mode='constant')
    # Split into 1-second clips
    clips = [y[i:i + sr] for i in range(0, len(y), sr)]
    mel_spectrograms = []
    for clip in clips:
        mel_spec = librosa.feature.melspectrogram(y=clip, sr=sr, n_mels=n_mels)
        mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
        mel_spectrograms.append(mel_spec_db)
    return mel_spectrograms

In [3]:
data_dir = 'C:/Users/SSAFY/Desktop/original_noise_data'

In [4]:
# 전처리된 파일을 저장할 디렉토리 설정
save_dir = "C:/Users/SSAFY/Desktop/processed_noise_data"

In [5]:
os.makedirs(save_dir, exist_ok=True)

In [6]:
# 모든 파일에 대해 멜 스펙트로그램 생성 및 저장
for file_name in tqdm(os.listdir(data_dir)):
    if file_name.endswith('.wav') or file_name.endswith('.mp3'):
        file_path = os.path.join(data_dir, file_name)
        mel_spectrograms = preprocess_audio(file_path)
        base_name = os.path.splitext(file_name)[0]
        for i, mel_spec in enumerate(mel_spectrograms):
            save_path = os.path.join(save_dir, f'{base_name}_{i}.npy')
            np.save(save_path, mel_spec)

  0%|          | 0/194 [00:00<?, ?it/s]

100%|██████████| 194/194 [00:08<00:00, 22.48it/s]


In [7]:
import numpy as np

def augment_audio(data):
    augmented_data = []
    
    # Original
    augmented_data.append(data)
    
    # Reverse
    reversed_data = np.flipud(data)
    augmented_data.append(reversed_data)
    
    # Vertical flip
    flipped_data = np.fliplr(data)
    augmented_data.append(flipped_data)
    
    # Gaussian noise
    noise = np.random.normal(0, 0.1, data.shape)
    noisy_data = data + noise
    augmented_data.append(noisy_data)
    
    return augmented_data

# 증강 데이터 저장 디렉터리 설정
augmented_save_dir = 'C:/Users/SSAFY/Desktop/noise_augmented_mel_spectrograms'
os.makedirs(augmented_save_dir, exist_ok=True)

# 모든 멜 스펙트로그램 파일에 대해 데이터 증강
for file_name in tqdm(os.listdir(save_dir)):
    if file_name.endswith('.npy'):
        file_path = os.path.join(save_dir, file_name)
        data = np.load(file_path)
        augmented_data = augment_audio(data)
        base_name = os.path.splitext(file_name)[0]
        for i, aug_data in enumerate(augmented_data):
            save_path = os.path.join(augmented_save_dir, f'{base_name}_aug_{i}.npy')
            np.save(save_path, aug_data)


100%|██████████| 1348/1348 [00:03<00:00, 391.19it/s]
