In [1]:
import os
cwd_old = os.getcwd()
dir_path = '..'
os.chdir(dir_path)
cwd_new = os.getcwd()
print("The working directory is moved \nfrom {} \nto {}.".format(cwd_old, cwd_new))

The working directory is moved 
from /data2/sungjaecho/Projects/waveglow/dev_ipynb 
to /data2/sungjaecho/Projects/waveglow.


# Make training and test datasets

In [2]:
import wave
import contextlib
def get_duration(wav_path):
    with contextlib.closing(wave.open(wav_path,'r')) as f:
        frames = f.getnframes()
        rate = f.getframerate()
        duration = frames / float(rate)
    return duration

In [3]:
import os
from tqdm import tqdm

In [4]:
data_tts_dir = '/data2/sungjaecho/data_tts'
db_list = ['KETTS', 'KETTS2', 'KSS', 'KAB', 'NC']
wav_dirs = [os.path.join(data_tts_dir, db, db, 'wav') for db in db_list]

In [5]:
wav_dirs

['/data2/sungjaecho/data_tts/KETTS/KETTS/wav',
 '/data2/sungjaecho/data_tts/KETTS2/KETTS2/wav',
 '/data2/sungjaecho/data_tts/KSS/KSS/wav',
 '/data2/sungjaecho/data_tts/KAB/KAB/wav',
 '/data2/sungjaecho/data_tts/NC/NC/wav']

In [6]:
voice_type_set = set()
for wav_dir in wav_dirs:
    wavs = sorted(os.listdir(wav_dir))
    for wav in wavs:
        splits = wav.split('_')
        if len(splits) == 3:
            voice_type = '{}_{}'.format(splits[0], splits[1])
        else:
            voice_type = splits[0]
        voice_type_set.add(voice_type)
print('voice_type_set', voice_type_set)
print('len(voice_type_set)', len(voice_type_set))

voice_type_set {'kss-w', 'ketts2-50m_surprised', 'ketts2-60f_disgusted', 'ketts2-60f_sad', 'ketts-30m_fearful', 'ketts-30m_disgusted', 'kab-w4', 'ketts2-20m_surprised', 'kab-m2', 'ketts-30f_disgusted', 'ketts2-60f_fearful', 'ketts2-30f_angry', 'ketts2-50m_neutral', 'ketts2-40m_surprised', 'ketts2-40m_fearful', 'ketts2-50f_angry', 'nc-m_angry', 'ketts2-20m_neutral', 'ketts2-40m_happy', 'ketts-30f_fearful', 'ketts-30f_happy', 'kab-w5', 'nc-f_fearful', 'kab-w3', 'nc-f_neutral', 'nc-m_sad', 'ketts-30m_surprised', 'kab-m6', 'kab-w2', 'ketts2-20m_happy', 'ketts-30m_happy', 'ketts2-30f_fearful', 'ketts2-50m_disgusted', 'ketts2-30f_disgusted', 'kab-m4', 'ketts-30f_surprised', 'ketts2-60f_surprised', 'ketts2-40m_disgusted', 'kab-m1', 'ketts2-50m_angry', 'ketts2-40m_sad', 'ketts-30f_sad', 'ketts-30f_neutral', 'ketts-30f_angry', 'ketts2-30f_happy', 'nc-f_joyful', 'ketts2-40m_angry', 'nc-f_angry', 'ketts2-20m_fearful', 'ketts-30m_neutral', 'ketts2-30f_sad', 'kab-m5', 'ketts2-50f_sad', 'nc-m_joyful

In [7]:
n_test_wav_per_voice_type = 10
max_duration = 10

test_wavs = dict()
train_wavs = dict()
for voice_type in voice_type_set:
    test_wavs[voice_type] = list()
for voice_type in voice_type_set:
    train_wavs[voice_type] = list()
    
for wav_dir in wav_dirs:
    wavs = sorted(os.listdir(wav_dir))
    for wav in tqdm(wavs):
        wav_path = os.path.join(wav_dir, wav)
        if get_duration(wav_path) > max_duration:
            continue
        
        splits = wav.split('_')
        if len(splits) == 3:
            voice_type = '{}_{}'.format(splits[0], splits[1])
        else:
            voice_type = splits[0]
        
        if len(test_wavs[voice_type]) < n_test_wav_per_voice_type:
            test_wavs[voice_type].append(wav_path)
        else:
            train_wavs[voice_type].append(wav_path)

for voice_type in voice_type_set:
    print('len(test_wavs[{}])'.format(voice_type), len(test_wavs[voice_type]))
for voice_type in voice_type_set:
     print('len(train_wavs[{}])'.format(voice_type), len(train_wavs[voice_type]))

100%|██████████| 43082/43082 [00:01<00:00, 26938.17it/s]
100%|██████████| 21000/21000 [00:00<00:00, 27000.29it/s]
100%|██████████| 12854/12854 [00:00<00:00, 28031.45it/s]
100%|██████████| 58561/58561 [00:02<00:00, 27336.69it/s]
100%|██████████| 15406/15406 [00:00<00:00, 27584.26it/s]

len(test_wavs[kss-w]) 10
len(test_wavs[ketts2-50m_surprised]) 10
len(test_wavs[ketts2-60f_disgusted]) 10
len(test_wavs[ketts2-60f_sad]) 10
len(test_wavs[ketts-30m_fearful]) 10
len(test_wavs[ketts-30m_disgusted]) 10
len(test_wavs[kab-w4]) 10
len(test_wavs[ketts2-20m_surprised]) 10
len(test_wavs[kab-m2]) 10
len(test_wavs[ketts-30f_disgusted]) 10
len(test_wavs[ketts2-60f_fearful]) 10
len(test_wavs[ketts2-30f_angry]) 10
len(test_wavs[ketts2-50m_neutral]) 10
len(test_wavs[ketts2-40m_surprised]) 10
len(test_wavs[ketts2-40m_fearful]) 10
len(test_wavs[ketts2-50f_angry]) 10
len(test_wavs[nc-m_angry]) 10
len(test_wavs[ketts2-20m_neutral]) 10
len(test_wavs[ketts2-40m_happy]) 10
len(test_wavs[ketts-30f_fearful]) 10
len(test_wavs[ketts-30f_happy]) 10
len(test_wavs[kab-w5]) 10
len(test_wavs[nc-f_fearful]) 10
len(test_wavs[kab-w3]) 10
len(test_wavs[nc-f_neutral]) 10
len(test_wavs[nc-m_sad]) 10
len(test_wavs[ketts-30m_surprised]) 10
len(test_wavs[kab-m6]) 10
len(test_wavs[kab-w2]) 10
len(test_wavs[ket




In [8]:
test_wav_paths = list()
for voice_type in voice_type_set:
    test_wav_paths += test_wavs[voice_type]

train_wav_paths = list()
for voice_type in voice_type_set:
    train_wav_paths += train_wavs[voice_type]
    
print('len(test_wav_list)', len(test_wav_paths))
print('len(train_wav_list)', len(train_wav_paths))

len(test_wav_list) 800
len(train_wav_list) 146467


In [9]:
test_wav_paths

['/data2/sungjaecho/data_tts/KSS/KSS/wav/kss-w_1-0000.wav',
 '/data2/sungjaecho/data_tts/KSS/KSS/wav/kss-w_1-0001.wav',
 '/data2/sungjaecho/data_tts/KSS/KSS/wav/kss-w_1-0002.wav',
 '/data2/sungjaecho/data_tts/KSS/KSS/wav/kss-w_1-0003.wav',
 '/data2/sungjaecho/data_tts/KSS/KSS/wav/kss-w_1-0004.wav',
 '/data2/sungjaecho/data_tts/KSS/KSS/wav/kss-w_1-0005.wav',
 '/data2/sungjaecho/data_tts/KSS/KSS/wav/kss-w_1-0006.wav',
 '/data2/sungjaecho/data_tts/KSS/KSS/wav/kss-w_1-0007.wav',
 '/data2/sungjaecho/data_tts/KSS/KSS/wav/kss-w_1-0008.wav',
 '/data2/sungjaecho/data_tts/KSS/KSS/wav/kss-w_1-0009.wav',
 '/data2/sungjaecho/data_tts/KETTS2/KETTS2/wav/ketts2-50m_surprised_001.wav',
 '/data2/sungjaecho/data_tts/KETTS2/KETTS2/wav/ketts2-50m_surprised_002.wav',
 '/data2/sungjaecho/data_tts/KETTS2/KETTS2/wav/ketts2-50m_surprised_003.wav',
 '/data2/sungjaecho/data_tts/KETTS2/KETTS2/wav/ketts2-50m_surprised_004.wav',
 '/data2/sungjaecho/data_tts/KETTS2/KETTS2/wav/ketts2-50m_surprised_005.wav',
 '/data2/s

In [10]:
with open('test_files.txt', 'w') as f:
    w_string = ''
    for test_wav_path in test_wav_paths:
        w_string += test_wav_path
        w_string += '\n'
    w_string = w_string[:-1]
    f.write(w_string)
    
with open('train_files.txt', 'w') as f:
    w_string = ''
    for train_wav_path in train_wav_paths:
        w_string += train_wav_path
        w_string += '\n'
    w_string = w_string[:-1]
    f.write(w_string)