This notebook goes through the voice conversion of speakers from the ARCTIC and ARCTIC2 corpus using the SIDEKIT toolkit for feature extraction.

## Set up

In [1]:
%set_env SIDEKIT=theano=false
import sidekit
import os
from os import listdir
from os.path import join, expanduser
from shutil import copy
from sklearn.model_selection import train_test_split

env: SIDEKIT=theano=false


  return f(*args, **kwds)


In [2]:
!pwd

/Users/kennylino/Documents/em_lct/UoM/thesis/experiments


In [3]:
# if cwd not './thesis' (e.g. './thesis/experiments/'), moves one up
os.chdir('..')
!pwd

/Users/kennylino/Documents/em_lct/UoM/thesis


## Rename files and create train/test sets

The files are renamed and reorganized to better follow the structure desired by the SIDEKIT toolkit.
Some easier file movement may have been conducted using drag in drop in Finder.

In [None]:
corpus_dir = "./data/arctic2/" # corpus directory

In [None]:
# WARNING: run only once or filenames will keep changing
for dirName, subdirList, fileList in os.walk(corpus_dir):
    if len(fileList) > 1:
        for fname in fileList:
            split_dir_name = dirName.split('/') # splits the directory path into each folder; e.g. ['.', 'data', 'arctic2', 'YDCK', 'wav']
            os.rename(os.path.join(dirName, fname), os.path.join(dirName, split_dir_name[-2] + '_' + fname)) # renames files to name of speaker + audio

In [None]:
# examine number of audio files per folder because some folders missing audio
for dirName, subdirList, fileList in os.walk(corpus_dir):
    if len(fileList) > 1:
        print (dirName)
        print('size= ' +  str(len(fileList)))

In [None]:
EBVS_dir = "./data/arctic2/EBVS/wav" # corpus directory

In [None]:
# select EBVS folder to decide train/test because it has the least audio files
EBVS_files = os.listdir(EBVS_dir)

In [None]:
# selects 150 audios at random using the seed and splits them into 100 train, 50 test
data_split = train_test_split(EBVS_files, train_size=100, test_size=50, random_state=42)
train_data = data_split[0]
test_data = data_split[1]

In [None]:
print(train_data)

In [None]:
# make a train and test folder for each speaker
for dirName, subdirList, fileList in os.walk(corpus_dir):
    for subdir in subdirList:
        subdir_path = os.path.join(dirName, subdir)
        os.mkdir((os.path.join(subdir_path, 'train')))
        os.mkdir((os.path.join(subdir_path, 'test')))
    break

In [None]:
# moves files from 'wav' folder to 'train' folder for each speaker
for file in train_data:
    split_fname = file.split('_') #splits filename into something like ['EBVS', 'arctic', 'b0072.wav']
    for dirName, subdirList, fileList in os.walk(corpus_dir):
        if len(fileList) > 101: # cheap hack to avoid error of copying within same folder; 101 for size of train data
            for f in fileList:
                if f.split('_')[2] == split_fname[2]:
                    spk_folder = os.path.split(dirName)[0]
                    copy(os.path.join(dirName, f), os.path.join(spk_folder, 'train'))  

In [None]:
# # moves files from 'wav' folder to 'test' folder for each speaker
for file in test_data:
    split_fname = file.split('_') #splits filename into something like ['EBVS', 'arctic', 'b0072.wav']
    for dirName, subdirList, fileList in os.walk(corpus_dir):
        if len(fileList) > 101: # cheap hack to avoid error of copying within same folder; 101 for size of training ta
            for f in fileList:
                if f.split('_')[2] == split_fname[2]:
                    spk_folder = os.path.split(dirName)[0]
                    copy(os.path.join(dirName, f), os.path.join(spk_folder, 'test'))  

## Load data

In [4]:
training_data_dir = "./data/arctic2/train/" # training data directory
training_data_list = os.listdir(training_data_dir)
training_data_list = ['train/' + f.split('.')[0] for f in training_data_list] # remove extension; add folder name

## Extract features

http://www-lium.univ-lemans.fr/sidekit/tutorial/hdf5.html

In [None]:
# defines how to extract features (MFCCs, bottleneck, etc.) of audio files in a given path
extractor = sidekit.FeaturesExtractor(audio_filename_structure='./data/arctic2/{}.wav',
                                      feature_filename_structure='./data/arctic2/feat/{}.h5',
                                      sampling_frequency=16000,
                                      lower_frequency=200,
                                      higher_frequency=3800,
                                      filter_bank="log",
                                      filter_bank_size=24,
                                      window_size=0.025,
                                      shift=0.01,
                                      ceps_number=24,
                                      vad="snr",
                                      snr=40,
                                      pre_emphasis=0.97,
                                      save_param=["vad", "energy", "cep"],
                                      keep_all_features=True)

In [None]:
# looks like number of threads can change whether ALL files get processed or not;
# keep at 12 threads!
%%time
extractor.save_list(show_list=training_data_list,
                    channel_list=[0]*len(training_data_list),
                    num_thread=12)

## UBM-GMM Model

loading features without processing features and saving first does not seem to work?

In [5]:
feat_dir = "./data/arctic2/feat/train" # training data directory
feat_list = os.listdir(feat_dir)
feat_list = ['train/' + f.split('.')[0] for f in feat_list] # remove extension; add folder name

http://www-lium.univ-lemans.fr/sidekit/tutorial/ubmTraining.html

In [6]:
# be careful here; SIDEKIT expects the internal folder of the h5 
# and the given filename_structure (the thing inside {}) to match
print(feat_list[0])

train/BDL_arctic_b0247


In [7]:
# defines a FeaturesServer that describes *how* to load the data
fs = sidekit.FeaturesServer(feature_filename_structure='./data/arctic2/feat/{}.h5',
                             sources=None,
                             dataset_list=["vad", "energy", "cep"],
                             mask=None,
                             feat_norm="cmvn",
                             global_cmvn=None,
                             dct_pca=False,
                             dct_pca_config=None,
                             sdc=False,
                             sdc_config=None,
                             delta=True,
                             double_delta=True,
                             delta_filter=None,
                             context=None,
                             traps_dct_nb=None,
                             rasta=False,
                             keep_all_features=False)

In [8]:
ubm = sidekit.Mixture()

In [None]:
# started at 1:20am
%%time
model = ubm.EM_split(features_server=fs,
                      feature_list=feat_list,
                      distrib_nb=512,
                      num_thread=12,
                      save_partial=False
                      )
ubm.write('ubm.h5')

In [None]:
ubm.write(corpus_dir + 'ubm.h5')