# üìÅ 06_data_augmentation_final_fixed.ipynb

# üè¢ Step 6: Data Augmentation (Noise, Pitch Shift, Stretch, Mixed)
This notebook applies 4 types of augmentations:
- Add random noise
- Change pitch
- Time stretch
- Mixed random augmentations

In [None]:
# üìÇ Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# üì¶ Libraries
import os
import librosa
import soundfile as sf
import numpy as np
import random

In [None]:
# ‚öôÔ∏è Settings
sr = 16000

In [None]:
# üìÇ Input and Output Paths
input_folder = "/content/drive/MyDrive/final_data"
output_folders = {
    "noised": "/content/drive/MyDrive/data_noised",
    "pitch": "/content/drive/MyDrive/data_pitch",
    "stretch": "/content/drive/MyDrive/data_stretch",
    "mixed": "/content/drive/MyDrive/data_mixed"
}

for folder in output_folders.values():
    os.makedirs(folder, exist_ok=True)

In [None]:
# üî• Augmentation Functions
def add_noise(y):
    noise = np.random.normal(0, 0.005, y.shape)
    return y + noise

def pitch_shift_audio(y, sr):
    steps = random.choice([-2, -1, 1, 2])
    return librosa.effects.pitch_shift(y, sr=sr, n_steps=steps)

def time_stretch_audio(y):
    rate = random.uniform(0.9, 1.1)
    return librosa.effects.time_stretch(y, rate=rate)

def apply_random_augmentation(y, sr):
    choice = random.choice(['noise', 'pitch', 'stretch'])
    if choice == 'noise':
        return add_noise(y)
    elif choice == 'pitch':
        return pitch_shift_audio(y, sr)
    else:
        return time_stretch_audio(y)

In [None]:
# üîÅ Process All Files
for filename in os.listdir(input_folder):
    if filename.endswith(".wav"):
        filepath = os.path.join(input_folder, filename)
        try:
            y, sr_loaded = librosa.load(filepath, sr=sr)

            # Noise
            y_noised = add_noise(y)
            sf.write(os.path.join(output_folders['noised'], filename.replace(".wav", "_noised.wav")), y_noised, sr)

            # Pitch Shift
            y_pitch = pitch_shift_audio(y, sr)
            sf.write(os.path.join(output_folders['pitch'], filename.replace(".wav", "_pitch.wav")), y_pitch, sr)

            # Time Stretch
            y_stretch = time_stretch_audio(y)
            sf.write(os.path.join(output_folders['stretch'], filename.replace(".wav", "_stretch.wav")), y_stretch, sr)

            # Mixed Random Augmentation
            y_mixed = apply_random_augmentation(y, sr)
            sf.write(os.path.join(output_folders['mixed'], filename.replace(".wav", "_mixed.wav")), y_mixed, sr)

        except Exception as e:
            print(f"‚ö†Ô∏è Error processing {filename}: {e}")

print("‚úÖ All data augmentations completed successfully!")