In [None]:
!pip install pydub
!pip install huggingface_hub

In [9]:
from huggingface_hub import snapshot_download, notebook_login
from pydub import AudioSegment
from tqdm import tqdm

import random
import os
import shutil
import pandas as pd
import glob

In [None]:
# notebook_login()

In [7]:
AUGMENTED_DATASET_FOLDER = "../../dataset/dataset_augmented"
DATASET_FOLDER = "../../dataset/data"
NOISE_FOLDER = "../../dataset/noise"

In [None]:
# snapshot_download("PFEE-TxE/audio_sampler", repo_type="dataset", local_dir=DATASET_FOLDER)

In [3]:
def copy_data_to_augmented_dataset():
    source_dir = DATASET_FOLDER
    dest_dir = AUGMENTED_DATASET_FOLDER

    if os.path.exists(dest_dir):
        shutil.rmtree(dest_dir)

    os.makedirs(dest_dir)

    for subdir in os.listdir(source_dir):
        subdir_path = os.path.join(source_dir, subdir)

        if os.path.isdir(subdir_path):
            for file in os.listdir(subdir_path):
                file_path = os.path.join(subdir_path, file)
                shutil.copy(file_path, dest_dir)

def merge_audio_with_noise(record_file: str, noise_file: str, noise_name: str):
    original = AudioSegment.from_file(record_file)
    background_noise = AudioSegment.from_file(noise_file) - 30

    background_duration = len(background_noise)

    start_point = random.randint(0, background_duration - 2000)
    selected_noise = background_noise[start_point:start_point + 2000]

    combined = original.overlay(selected_noise)

    new_file_name = record_file[:-4] + '_' + noise_name + ".wav"

    combined.export(new_file_name, format='wav')

def pitch_shift(audio_file, semitones):
    song = AudioSegment.from_file(audio_file, format="wav")
    shifted_song = song._spawn(song.raw_data, overrides={
        "frame_rate": int(song.frame_rate * (2 ** (semitones / 12.0)))
    }).set_frame_rate(song.frame_rate)

    sign = 'plus' if semitones >= 0 else 'minus'
    
    new_file_name = audio_file[:-4] + "_" + sign + "_" + str(abs(semitones)) + ".wav"

    shifted_song.export(new_file_name, format="wav")


def generate_augmented_dataset(semitones: [int], noises: [str]) -> None:
    copy_data_to_augmented_dataset()
    folder = AUGMENTED_DATASET_FOLDER

    for file in os.listdir(folder):
        path = os.path.join(folder, file)

        if os.path.isfile(path):
            for semitone in semitones:
                pitch_shift(AUGMENTED_DATASET_FOLDER + '/' + file, semitone)

    for file in tqdm(os.listdir(folder), desc="Processing Files"):
        path = os.path.join(folder, file)

        if os.path.isfile(path):
            for noise in noises:
                merge_audio_with_noise(AUGMENTED_DATASET_FOLDER + '/' + file, NOISE_FOLDER + '/' + noise + '.wav', noise)

In [4]:
semitones = [-1, 1]
noises = ["city", "classroom", "forest", "rain"]

In [8]:
generate_augmented_dataset(semitones, noises)

Processing Files: 100%|██████████| 2052/2052 [14:37<00:00,  2.34it/s]


In [10]:
def extract_info_corrected(file_name):
    # remove everything before the last /
    file_path = file_name
    file_name = file_name.split('/')[-1]

    base_name = file_name[:-4]
    parts = base_name.split('_')

    pitch = 0
    noise = None
    origin = parts[0]
    label = origin.split('-')[0].lower()

    for i in range(len(parts)):
        if parts[i] == 'minus':
            pitch = -1 * int(parts[i + 1])
        elif parts[i] == 'plus':
            pitch = int(parts[i + 1])

    if not (len(parts) == 1 or len(parts) == 3):
        noise = parts[-1]

    return origin, label, pitch, noise, file_name, file_path,

def load_augmented_data():
    list_str = glob.glob(os.path.join(AUGMENTED_DATASET_FOLDER, '*'))

    df = pd.DataFrame([extract_info_corrected(file) for file in list_str],
                               columns=['origin_file', 'label', 'pitch', 'noise', 'file_name', 'file_path'])

    return df

In [12]:
data = load_augmented_data()
data.head(5)

Unnamed: 0,origin_file,label,pitch,noise,file_name,file_path
0,Accacia-cfeba5f4,accacia,0,classroom,Accacia-cfeba5f4_classroom.wav,../../dataset/dataset_augmented/Accacia-cfeba5...
1,Bouleau-ddf0a5d1,bouleau,0,forest,Bouleau-ddf0a5d1_forest.wav,../../dataset/dataset_augmented/Bouleau-ddf0a5...
2,Sapin-c26f8166,sapin,-1,rain,Sapin-c26f8166_minus_1_rain.wav,../../dataset/dataset_augmented/Sapin-c26f8166...
3,Accacia-1612e592,accacia,-1,classroom,Accacia-1612e592_minus_1_classroom.wav,../../dataset/dataset_augmented/Accacia-1612e5...
4,Bouleau-c7bbd516,bouleau,-1,city,Bouleau-c7bbd516_minus_1_city.wav,../../dataset/dataset_augmented/Bouleau-c7bbd5...
