In [8]:
import os
import numpy as np
import librosa
import librosa.display
from tqdm import tqdm

In [9]:
def preprocess_audio(file_path, sr=22050, duration=1.0, n_mfcc=13, n_mels=128, max_len=44):
    y, sr = librosa.load(file_path, sr=sr)
    # Pad audio to ensure it's at least as long as the desired duration
    if len(y) < sr * duration:
        y = np.pad(y, (0, sr * duration - len(y)), mode='constant')
    # Split into 1-second clips
    clips = [y[i:i + sr] for i in range(0, len(y), sr)]
    features = []
    for clip in clips:
        # Mel spectrogram
        mel_spec = librosa.feature.melspectrogram(y=clip, sr=sr, n_mels=n_mels)
        mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
        
        # MFCC
        mfcc = librosa.feature.mfcc(S=mel_spec_db, sr=sr, n_mfcc=n_mfcc)
        mfcc = librosa.util.normalize(mfcc)  # Optional normalization
        
        # Concatenate Mel spectrogram and MFCC
        combined = np.vstack([mel_spec_db, mfcc])
        
        # Pad or truncate to ensure fixed size
        if combined.shape[1] < max_len:
            combined = np.pad(combined, ((0, 0), (0, max_len - combined.shape[1])), mode='constant')
        else:
            combined = combined[:, :max_len]
        
        features.append(combined)
    return features

In [10]:
data_dir = 'C:/Users/SSAFY/Desktop/original_noise_data'
save_dir = "C:/Users/SSAFY/Desktop/noise_processed_combined_features"
os.makedirs(save_dir, exist_ok=True)

In [11]:
# 모든 파일에 대해 결합 특징 생성 및 저장
for file_name in tqdm(os.listdir(data_dir)):
    if file_name.endswith('.wav') or file_name.endswith('.mp3'):
        file_path = os.path.join(data_dir, file_name)
        features = preprocess_audio(file_path)
        base_name = os.path.splitext(file_name)[0]
        for i, feature in enumerate(features):
            save_path = os.path.join(save_dir, f'{base_name}_{i}.npy')
            np.save(save_path, feature)

100%|██████████| 194/194 [00:11<00:00, 17.14it/s]


In [12]:
def augment_audio(data):
    augmented_data = []
    
    # Original
    augmented_data.append(data)
    
    # Reverse
    reversed_data = np.flipud(data)
    augmented_data.append(reversed_data)
    
    # Vertical flip
    flipped_data = np.fliplr(data)
    augmented_data.append(flipped_data)
    
    # Gaussian noise
    noise = np.random.normal(0, 0.1, data.shape)
    noisy_data = data + noise
    augmented_data.append(noisy_data)
    
    return augmented_data

In [13]:
augmented_save_dir = 'C:/Users/SSAFY/Desktop/noise_augmented_combined_features_noise'
os.makedirs(augmented_save_dir, exist_ok=True)

In [14]:
# 모든 결합 특징 파일에 대해 데이터 증강
for file_name in tqdm(os.listdir(save_dir)):
    if file_name.endswith('.npy'):
        file_path = os.path.join(save_dir, file_name)
        data = np.load(file_path)
        augmented_data = augment_audio(data)
        base_name = os.path.splitext(file_name)[0]
        for i, aug_data in enumerate(augmented_data):
            save_path = os.path.join(augmented_save_dir, f'{base_name}_aug_{i}.npy')
            np.save(save_path, aug_data)

100%|██████████| 1348/1348 [00:06<00:00, 220.26it/s]
