In [2]:
import os
import shutil

def copy_files(input_folder, train_folder, test_folder):
    files = sorted(os.listdir(input_folder))
    os.makedirs(train_folder, exist_ok=True)
    os.makedirs(test_folder, exist_ok=True)
    
    train_count = 0
    test_count = 0
    
    for index, file_name in enumerate(files):
        source_path = os.path.join(input_folder, file_name)
        if os.path.isfile(source_path):
            if index % 2 == 0:
                shutil.copy(source_path, os.path.join(test_folder, file_name))
                test_count += 1
            else:
                shutil.copy(source_path, os.path.join(train_folder, file_name))
                train_count += 1
    

    print(f"Number of files copied to the train folder: {train_count}")
    print(f"Number of files copied to the test folder: {test_count}")

if __name__ == "__main__":
    input_folder = "/mnt/d/deBarbaroCry/deBarbaroCry/P29/preprocessed_cry"
    train_folder = "CryCorpusFinal/cry"
    test_folder = "CryCorpusFinal/Test"
      
    copy_files(input_folder, train_folder, test_folder)


Number of files copied to the train folder: 233
Number of files copied to the test folder: 234


In [4]:
import os
import random
import librosa
import soundfile as sf
import numpy as np

AUDIO_PATH = 'CryCorpusFinal'

AUDIO_PATH = 'CryCorpusFinal'

def augment_data(input_folder, output_folder, ogg_files):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Resample ogg files to 22050 Hz
    # Resample ogg files to 22050 Hz
    ogg_clips = []
    for ogg_file in ogg_files:
        y, sr = librosa.load(ogg_file, sr=22050)
        if len(y) < 5 * sr:
            y = np.tile(y, int(np.ceil(5 * sr / len(y))))[:5 * sr]
        else:
            y = y[:5 * sr]
        ogg_clips.append((y, os.path.basename(ogg_file).split('.')[0]))

    input_files = [f for f in os.listdir(input_folder) if f.endswith('.wav')]
    random.shuffle(input_files)  # Shuffle input files to randomize pairing
    num_groups = len(ogg_files)
    files_per_group = len(input_files) // num_groups

    # Split input files into groups
    for i, ogg_clip in enumerate(ogg_clips):
        group_files = input_files[i * files_per_group:(i + 1) * files_per_group]
        ogg_clip_data, ogg_clip_name = ogg_clip
        
        for input_file in group_files:
            input_path = os.path.join(input_folder, input_file)
            y, sr = librosa.load(input_path, sr=22050)
            
            # Randomly reduce gain of ogg clip
            gain_reduction = random.uniform(0, -20)
            ogg_clip_adjusted = librosa.util.normalize(ogg_clip_data) * (10 ** (gain_reduction / 20))
            
            # Mix the input file with the ogg clip
            mixed_audio = y + ogg_clip_adjusted[:len(y)]
            mixed_audio = librosa.util.normalize(mixed_audio)
            
            output_file = f"{os.path.splitext(input_file)[0]}_{ogg_clip_name}_augmented.wav"
            output_path = os.path.join(output_folder, output_file)
            sf.write(output_path, mixed_audio, sr)

input_folder = f'{AUDIO_PATH}/cry'
output_folder = f'{AUDIO_PATH}/cry/augmented'
ogg_files = [f'{AUDIO_PATH}/ac.ogg', f'{AUDIO_PATH}/dishwasher.ogg', f'{AUDIO_PATH}/fan.ogg', f'{AUDIO_PATH}/refridgerator.ogg', 
             f'{AUDIO_PATH}/tv.ogg',f'{AUDIO_PATH}/vaccum_cleaner.ogg']
input_folder = f'{AUDIO_PATH}/cry'
output_folder = f'{AUDIO_PATH}/cry/augmented'
ogg_files = [f'{AUDIO_PATH}/ac.ogg', f'{AUDIO_PATH}/dishwasher.ogg', f'{AUDIO_PATH}/fan.ogg', f'{AUDIO_PATH}/refridgerator.ogg', 
             f'{AUDIO_PATH}/tv.ogg',f'{AUDIO_PATH}/vaccum_cleaner.ogg']

augment_data(input_folder, output_folder, ogg_files)