In [None]:
import os
cwd_old = os.getcwd()
dir_path = '..'
os.chdir(dir_path)
cwd_new = os.getcwd()
print("The working directory is moved \nfrom {} \nto {}.".format(cwd_old, cwd_new))

# Make training and test datasets

In [2]:
import wave
import contextlib
def get_duration(wav_path):
    with contextlib.closing(wave.open(wav_path,'r')) as f:
        frames = f.getnframes()
        rate = f.getframerate()
        duration = frames / float(rate)
    return duration, frames, rate

In [3]:
import os
from tqdm import tqdm

In [4]:
import json
with open('config.json') as f:
    data = f.read()
config = json.loads(data)
wg_segment_length = config["data_config"]["segment_length"]
wg_sampling_rate = config["data_config"]["sampling_rate"]

In [5]:
data_tts_dir = '/data2/sungjaecho/data_tts'
db_list = ['KETTS', 'KETTS2', 'KSS', 'KAB', 'NC']
wav_dirs = [os.path.join(data_tts_dir, db, db, 'wav') for db in db_list]

In [6]:
wav_dirs

['/data2/sungjaecho/data_tts/KETTS/KETTS/wav',
 '/data2/sungjaecho/data_tts/KETTS2/KETTS2/wav',
 '/data2/sungjaecho/data_tts/KSS/KSS/wav',
 '/data2/sungjaecho/data_tts/KAB/KAB/wav',
 '/data2/sungjaecho/data_tts/NC/NC/wav']

In [7]:
voice_type_set = set()
for wav_dir in wav_dirs:
    wavs = sorted(os.listdir(wav_dir))
    for wav in wavs:
        splits = wav.split('_')
        if len(splits) == 3:
            voice_type = '{}_{}'.format(splits[0], splits[1])
        else:
            voice_type = splits[0]
        voice_type_set.add(voice_type)
print('voice_type_set', voice_type_set)
print('len(voice_type_set)', len(voice_type_set))

voice_type_set {'ketts2-30f_neutral', 'ketts2-40m_surprised', 'ketts-30m_sad', 'ketts2-30f_fearful', 'ketts2-50m_surprised', 'ketts-30f_angry', 'ketts-30m_fearful', 'kab-w2', 'ketts2-20m_fearful', 'ketts2-50f_neutral', 'ketts2-30f_sad', 'ketts2-20m_angry', 'kab-w1', 'ketts2-20m_happy', 'ketts2-60f_neutral', 'ketts2-60f_surprised', 'ketts-30f_surprised', 'ketts2-40m_angry', 'kab-m4', 'ketts2-50f_disgusted', 'nc-f_joyful', 'kab-m7', 'kab-m5', 'ketts2-50f_happy', 'ketts2-50f_sad', 'ketts2-60f_disgusted', 'ketts2-50f_angry', 'ketts2-50m_fearful', 'kab-w5', 'ketts2-30f_disgusted', 'ketts2-20m_surprised', 'ketts-30f_fearful', 'ketts2-60f_sad', 'nc-m_sad', 'ketts2-50m_happy', 'ketts-30f_happy', 'nc-f_angry', 'kab-m1', 'ketts2-40m_fearful', 'ketts2-50m_disgusted', 'kss-w', 'ketts-30m_happy', 'ketts2-30f_happy', 'kab-w4', 'nc-m_angry', 'ketts2-40m_happy', 'ketts2-40m_sad', 'ketts2-50m_sad', 'ketts-30m_neutral', 'ketts-30m_disgusted', 'ketts2-60f_fearful', 'nc-m_fearful', 'ketts2-50m_neutral', '

In [8]:
n_test_wav_per_voice_type = 10
max_duration = 10

cnt_wrong_sr = 0
cnt_short_wav = 0

test_wavs = dict()
train_wavs = dict()
for voice_type in voice_type_set:
    test_wavs[voice_type] = list()
for voice_type in voice_type_set:
    train_wavs[voice_type] = list()
    
for wav_dir in wav_dirs:
    wavs = sorted(os.listdir(wav_dir))
    for wav in tqdm(wavs):
        wav_path = os.path.join(wav_dir, wav)
        '''if get_duration(wav_path)[0] > max_duration:
            continue'''
        duration, n_frames, sr = get_duration(wav_path)
        if wg_sampling_rate != sr:
            cnt_wrong_sr += 1
            continue
        if n_frames < wg_segment_length:
            cnt_short_wav += 1
            continue
        
        splits = wav.split('_')
        if len(splits) == 3:
            voice_type = '{}_{}'.format(splits[0], splits[1])
        else:
            voice_type = splits[0]
        
        if len(test_wavs[voice_type]) < n_test_wav_per_voice_type:
            test_wavs[voice_type].append(wav_path)
        else:
            train_wavs[voice_type].append(wav_path)

for voice_type in voice_type_set:
    print('len(test_wavs[{}])'.format(voice_type), len(test_wavs[voice_type]))
for voice_type in voice_type_set:
     print('len(train_wavs[{}])'.format(voice_type), len(train_wavs[voice_type]))
        
print("Exclusion")
print("#samples with wrong sampling rate", cnt_wrong_sr)
print("#samples of short length", cnt_short_wav)

100%|██████████| 43082/43082 [00:01<00:00, 27344.70it/s]
100%|██████████| 21000/21000 [00:00<00:00, 27421.73it/s]
100%|██████████| 12854/12854 [00:00<00:00, 25302.86it/s]
100%|██████████| 58561/58561 [00:02<00:00, 27766.75it/s]
100%|██████████| 15406/15406 [00:00<00:00, 27290.48it/s]

len(test_wavs[ketts2-30f_neutral]) 10
len(test_wavs[ketts2-40m_surprised]) 10
len(test_wavs[ketts-30m_sad]) 10
len(test_wavs[ketts2-30f_fearful]) 10
len(test_wavs[ketts2-50m_surprised]) 10
len(test_wavs[ketts-30f_angry]) 10
len(test_wavs[ketts-30m_fearful]) 10
len(test_wavs[kab-w2]) 10
len(test_wavs[ketts2-20m_fearful]) 10
len(test_wavs[ketts2-50f_neutral]) 10
len(test_wavs[ketts2-30f_sad]) 10
len(test_wavs[ketts2-20m_angry]) 10
len(test_wavs[kab-w1]) 10
len(test_wavs[ketts2-20m_happy]) 10
len(test_wavs[ketts2-60f_neutral]) 10
len(test_wavs[ketts2-60f_surprised]) 10
len(test_wavs[ketts-30f_surprised]) 10
len(test_wavs[ketts2-40m_angry]) 10
len(test_wavs[kab-m4]) 10
len(test_wavs[ketts2-50f_disgusted]) 10
len(test_wavs[nc-f_joyful]) 10
len(test_wavs[kab-m7]) 10
len(test_wavs[kab-m5]) 10
len(test_wavs[ketts2-50f_happy]) 10
len(test_wavs[ketts2-50f_sad]) 10
len(test_wavs[ketts2-60f_disgusted]) 10
len(test_wavs[ketts2-50f_angry]) 10
len(test_wavs[ketts2-50m_fearful]) 10
len(test_wavs[kab-w




In [9]:
test_wav_paths = list()
for voice_type in voice_type_set:
    test_wav_paths += test_wavs[voice_type]

train_wav_paths = list()
for voice_type in voice_type_set:
    train_wav_paths += train_wavs[voice_type]
    
print('len(test_wav_list)', len(test_wav_paths))
print('len(train_wav_list)', len(train_wav_paths))

len(test_wav_list) 800
len(train_wav_list) 148953


In [10]:
test_wav_paths

['/data2/sungjaecho/data_tts/KETTS2/KETTS2/wav/ketts2-30f_neutral_001.wav',
 '/data2/sungjaecho/data_tts/KETTS2/KETTS2/wav/ketts2-30f_neutral_002.wav',
 '/data2/sungjaecho/data_tts/KETTS2/KETTS2/wav/ketts2-30f_neutral_003.wav',
 '/data2/sungjaecho/data_tts/KETTS2/KETTS2/wav/ketts2-30f_neutral_004.wav',
 '/data2/sungjaecho/data_tts/KETTS2/KETTS2/wav/ketts2-30f_neutral_005.wav',
 '/data2/sungjaecho/data_tts/KETTS2/KETTS2/wav/ketts2-30f_neutral_006.wav',
 '/data2/sungjaecho/data_tts/KETTS2/KETTS2/wav/ketts2-30f_neutral_007.wav',
 '/data2/sungjaecho/data_tts/KETTS2/KETTS2/wav/ketts2-30f_neutral_008.wav',
 '/data2/sungjaecho/data_tts/KETTS2/KETTS2/wav/ketts2-30f_neutral_009.wav',
 '/data2/sungjaecho/data_tts/KETTS2/KETTS2/wav/ketts2-30f_neutral_010.wav',
 '/data2/sungjaecho/data_tts/KETTS2/KETTS2/wav/ketts2-40m_surprised_001.wav',
 '/data2/sungjaecho/data_tts/KETTS2/KETTS2/wav/ketts2-40m_surprised_002.wav',
 '/data2/sungjaecho/data_tts/KETTS2/KETTS2/wav/ketts2-40m_surprised_003.wav',
 '/dat

In [11]:
with open('test_files.txt', 'w') as f:
    w_string = ''
    for test_wav_path in test_wav_paths:
        w_string += test_wav_path
        w_string += '\n'
    w_string = w_string[:-1]
    f.write(w_string)
    
with open('train_files.txt', 'w') as f:
    w_string = ''
    for train_wav_path in train_wav_paths:
        w_string += train_wav_path
        w_string += '\n'
    w_string = w_string[:-1]
    f.write(w_string)
    
from shutil import copyfile

copyfile('test_files.txt', 'file_lists/01_KETTS_KETTS2_KSS_NC_KAD/test_files.txt')
copyfile('train_files.txt', 'file_lists/01_KETTS_KETTS2_KSS_NC_KAD/train_files.txt')

'file_lists/01_KETTS_KETTS2_KSS_NC_KAD/train_files.txt'