# Preparing audios

## Split audio into segments

In [None]:
from pydub import AudioSegment
import os

input_file = "data/audio/bouba_raw.wav"
output_folder = "data/audio/bouba_segments/"

os.makedirs(output_folder, exist_ok=True)

audio = AudioSegment.from_wav(input_file)
segment_length = 1000

for i in range(60):
    start_time = i * segment_length
    end_time = (i + 1) * segment_length

    segment = audio[start_time:end_time]
    segment.export(f"{output_folder}bouba_{i+1:02d}.wav", format="wav")

In [None]:
input_file = "data/audio/kiki_raw.wav"
output_folder = "data/audio/kiki_segments/"

os.makedirs(output_folder, exist_ok=True)

audio = AudioSegment.from_wav(input_file)
segment_length = 1000  # milliseconds

for i in range(60):
    start_time = i * segment_length
    end_time = (i + 1) * segment_length

    segment = audio[start_time:end_time]
    segment.export(f"{output_folder}kiki_{i+1:02d}.wav", format="wav")

## Augment audio data

In [2]:
import os
import numpy as np
import librosa
import librosa.display
import soundfile as sf
from pydub import AudioSegment
from audiomentations import (
    Compose, TimeStretch, PitchShift, AddGaussianNoise, Shift, Gain
)

from tqdm import tqdm

augment = Compose([
    Shift(min_shift=-0.1, max_shift=0.2, p=0.5),  # Time-Shifting
    TimeStretch(min_rate=0.9, max_rate=1.1, p=0.5),  # Time-Stretching
    PitchShift(min_semitones=-3, max_semitones=3, p=0.5),  # PSOLA pitch shifting
    AddGaussianNoise(min_amplitude=0.0001, max_amplitude=0.01, p=0.5),  # Noise
    Gain(min_gain_db=-1, max_gain_db=1, p=0.5)  # Volume increase/decrease
])


In [3]:
bouba_dir = "data/audio/bouba_segments/"
kiki_dir = "data/audio/kiki_segments/"
augmented_dir = "data/audio/aug_audio/"

os.makedirs(augmented_dir, exist_ok=True)
os.makedirs(os.path.join(augmented_dir, "bouba"), exist_ok=True)
os.makedirs(os.path.join(augmented_dir, "kiki"), exist_ok=True)

target_samples = 3000
original_samples = 120  # 60 Bouba + 60 Kiki
needed_augmentations = target_samples // original_samples

def augment_and_save(file_path, output_path, num_augmentations=25):
    """Augments an audio file multiple times and saves the new versions."""
    y, sr = librosa.load(file_path, sr=22050)
    
    for i in range(num_augmentations):
        y_aug = augment(samples=y, sample_rate=sr)
        output_file = os.path.join(output_path, f"{os.path.basename(file_path).split('.')[0]}_aug{i}.wav")
        sf.write(output_file, y_aug, sr)

# Process Bouba
for file in tqdm(os.listdir(bouba_dir)):
    if file.endswith(".wav"):
        augment_and_save(os.path.join(bouba_dir, file), os.path.join(augmented_dir, "bouba"), num_augmentations=needed_augmentations)

# Process Kiki
for file in tqdm(os.listdir(kiki_dir)):
    if file.endswith(".wav"):
        augment_and_save(os.path.join(kiki_dir, file), os.path.join(augmented_dir, "kiki"), num_augmentations=needed_augmentations)


  y, sr = librosa.load(file_path, sr=22050)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
100%|███████████████████████████████████████████| 60/60 [01:46<00:00,  1.77s/it]
100%|███████████████████████████████████████████| 60/60 [01:46<00:00,  1.77s/it]
