In [1]:
import os
import shutil
import numpy as np

In [2]:
def copy_all_files(source_dir, dest_dir):
    for filename in os.listdir(source_dir):
        full_filename = os.path.join(source_dir, filename)
        shutil.copy(full_filename, dest_dir)

In [3]:
def list_with_full_paths(source_dir):
    return [os.path.join(source_dir, file) for file in os.listdir(source_dir)]

In [4]:
def randomly_split_in_half(source_dir):
    '''Returns full paths'''
    files = list_with_full_paths(source_dir)
    np.random.shuffle(files)
    split_point = len(files) // 2
    return files[:split_point], files[split_point:]

In [8]:
data_dir = 'data'
set_dirs = [os.path.join(data_dir, set_dir) for set_dir in ('clean-set', 'noisy-set')]

In [9]:
os.mkdir(data_dir)

In [10]:
for dir_ in set_dirs:
    os.mkdir(dir_)

In [11]:
commands = [
    'on', 'off',
    'left', 'right', 'up', 'down',
    'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'zero'
]

In [12]:
for outer_dir in set_dirs:
    for inner_dir in commands:
        os.mkdir(os.path.join(outer_dir, inner_dir))

In [13]:
data_source_dir = os.path.join('data-raw', 'augmented_dataset', 'augmented_dataset')
noisy_data_source_dir = os.path.join('data-raw', 'augmented_dataset_verynoisy', 'augmented_dataset_verynoisy')

In [14]:
clean_set_dir = os.path.join(data_dir, 'clean-set')
noisy_set_dir = os.path.join(data_dir, 'noisy-set')

In [15]:
# Valid commands, clean
for command_name in commands:
    source_dir = os.path.join(data_source_dir, command_name)
    dest_dir = os.path.join(clean_set_dir, command_name)
    copy_all_files(source_dir, dest_dir)

In [16]:
# Other commands, clean set
clean_set_all_other_files = list()
clean_set_other_commands_dir = os.path.join(clean_set_dir, 'other')
os.mkdir(clean_set_other_commands_dir)
for dir_ in os.listdir(data_source_dir):
    if dir_ not in commands:
        full_dir = os.path.join(data_source_dir, dir_)
        clean_set_all_other_files += [os.path.join(full_dir, file) for file in os.listdir(full_dir)]
        
for n, file in enumerate(clean_set_all_other_files):
    shutil.copy(file, os.path.join(clean_set_other_commands_dir, f'{n}.wav'))

In [17]:
# Valid commands, noisy
for command_name in commands:
    source_dir = os.path.join(noisy_data_source_dir, command_name)
    dest_dir = os.path.join(noisy_set_dir, command_name)
    copy_all_files(source_dir, dest_dir)

In [20]:
# Other commands, noisy
noisy_set_all_other_files = list()
noisy_set_other_commands_dir = os.path.join(noisy_set_dir, 'other')
os.mkdir(noisy_set_other_commands_dir)
for dir_ in os.listdir(noisy_data_source_dir):
    if dir_ not in commands:
        full_dir = os.path.join(noisy_data_source_dir, dir_)
        noisy_set_all_other_files += [os.path.join(full_dir, file) for file in os.listdir(full_dir)]
        
for n, file in enumerate(noisy_set_all_other_files):
    shutil.copy(file, os.path.join(noisy_set_other_commands_dir, f'{n}.wav'))