# 演習：声質変換

## 環境構築

In [1]:
# from google.colab import drive
# drive.mount('/content/drive', force_remount=True)

In [2]:
# cd /content/drive/'My Drive'/vc_exercise2019

In [3]:
# !pip3 install pyworld
# !pip3 install pysptk
# !pip3 install dtw

## 特徴量の分析

In [4]:
import os
import sys
import glob

from scipy.io import wavfile # for wavfile I/O
import pyworld as pw
import numpy as np
import pysptk as sptk

In [5]:
spklist = ["SF", "TF"]  # speaker list [source female speaker, target female speaker]
featlist = ["mgc","f0","ap"]

In [6]:
# Making directories for speech features
for s in spklist:
    for f in featlist:
        if not os.path.exists("data/{}/{}".format(s,f)):
            os.mkdir("data/{}/{}".format(s,f))

In [7]:
for s in spklist:
    wavlist = os.listdir("data/{}/wav".format(s))
    for wf in wavlist:
        # WORLD analysis for each file
        print("spekaer: {} file: {}".format(s,wf))
        fs, data = wavfile.read("data/{}/wav/{}".format(s,wf))
        data = data.astype(np.float)

        f0, t = pw.harvest(data, fs)
        sp = pw.cheaptrick(data, f0, t, fs)
        ap = pw.d4c(data, f0, t, fs)

        alpha = 0.42
        dim = 24
        mgc = sptk.sp2mc(sp, dim, alpha)

        bn, _ = os.path.splitext(wf)

        with open("data/{}/mgc/{}.mgc".format(s,bn),"wb") as f:
            mgc.tofile(f)
        with open("data/{}/f0/{}.f0".format(s,bn),"wb") as f:
            f0.tofile(f)
        with open("data/{}/ap/{}.ap".format(s,bn),"wb") as f:
            ap.tofile(f)

spekaer: SF file: atr503_a20.wav
spekaer: SF file: atr503_a49.wav
spekaer: SF file: atr503_a33.wav
spekaer: SF file: atr503_a42.wav
spekaer: SF file: atr503_a37.wav
spekaer: SF file: atr503_a08.wav
spekaer: SF file: atr503_a30.wav
spekaer: SF file: atr503_a01.wav
spekaer: SF file: atr503_a44.wav
spekaer: SF file: atr503_a32.wav
spekaer: SF file: atr503_a07.wav
spekaer: SF file: atr503_a22.wav
spekaer: SF file: atr503_a35.wav
spekaer: SF file: atr503_a46.wav
spekaer: SF file: atr503_a13.wav
spekaer: SF file: atr503_a27.wav
spekaer: SF file: atr503_a39.wav
spekaer: SF file: atr503_a36.wav
spekaer: SF file: atr503_a15.wav
spekaer: SF file: atr503_a03.wav
spekaer: SF file: atr503_a21.wav
spekaer: SF file: atr503_a18.wav
spekaer: SF file: atr503_a09.wav
spekaer: SF file: atr503_a34.wav
spekaer: SF file: atr503_a24.wav
spekaer: SF file: atr503_a11.wav
spekaer: SF file: atr503_a38.wav
spekaer: SF file: atr503_a43.wav
spekaer: SF file: atr503_a06.wav
spekaer: SF file: atr503_a26.wav
spekaer: S

## フレーム毎時間アラインメント

In [8]:
import os
import sys
import array

from dtw import dtw
import numpy as np
import pysptk as sptk

In [11]:
srcspk = "SF"
tgtspk = "TF"

mgclist = os.listdir("data/{}/mgc".format(srcspk))

if not os.path.isdir("data/{}/data".format(srcspk)):
    os.mkdir("data/{}/data".format(srcspk))
if not os.path.isdir("data/{}/data".format(tgtspk)):
    os.mkdir("data/{}/data".format(tgtspk))

In [12]:
def distfunc(x,y):
    # Euclid distance except first dim
    return np.linalg.norm(x[1:]-y[1:])

In [13]:
dim = 25 # mgc dim + 1
for mf in mgclist:
    print(mf)
    bn, _ = os.path.splitext(mf)
    srcfile = "data/{}/mgc/{}".format(srcspk,mf)
    tgtfile = "data/{}/mgc/{}".format(tgtspk,mf)

    with open(srcfile,"rb") as f:
        x = np.fromfile(f, dtype="<f8", sep="")
        x = x.reshape(len(x)//dim,dim)
    with open(tgtfile,"rb") as f:
        y = np.fromfile(f, dtype="<f8", sep="")
        y = y.reshape(len(y)//dim,dim)
    print("framelen: (x,y) = {} {}".format(len(x),len(y)))
    _,_,_, twf = dtw(x,y,distfunc)
    srcout = "data/{}/data/{}.dat".format(srcspk,bn)
    tgtout = "data/{}/data/{}.dat".format(tgtspk,bn)

    with open(srcout,"wb") as f:
        x[twf[0]].tofile(f)
    with open(tgtout,"wb") as f:
        y[twf[1]].tofile(f)

atr503_a36.mgc
framelen: (x,y) = 863 911
atr503_a31.mgc
framelen: (x,y) = 1549 1807
atr503_a37.mgc
framelen: (x,y) = 1137 1265
atr503_a14.mgc
framelen: (x,y) = 1211 1217
atr503_a29.mgc
framelen: (x,y) = 1611 1883
atr503_a46.mgc
framelen: (x,y) = 1881 1993
atr503_a39.mgc
framelen: (x,y) = 2087 2339
atr503_a03.mgc
framelen: (x,y) = 1027 907
atr503_a12.mgc
framelen: (x,y) = 1201 1253
atr503_a49.mgc
framelen: (x,y) = 1357 1465
atr503_a45.mgc
framelen: (x,y) = 1585 1525
atr503_a08.mgc
framelen: (x,y) = 1153 1039
atr503_a35.mgc
framelen: (x,y) = 1217 1157
atr503_a04.mgc
framelen: (x,y) = 1095 1149
atr503_a16.mgc
framelen: (x,y) = 1183 1205
atr503_a05.mgc
framelen: (x,y) = 1195 1061
atr503_a23.mgc
framelen: (x,y) = 1353 1395
atr503_a33.mgc
framelen: (x,y) = 1951 1943
atr503_a13.mgc
framelen: (x,y) = 1411 1377
atr503_a24.mgc
framelen: (x,y) = 939 837
atr503_a30.mgc
framelen: (x,y) = 1501 1529
atr503_a47.mgc
framelen: (x,y) = 1297 1123
atr503_a15.mgc
framelen: (x,y) = 1635 1605
atr503_a10.mgc
f

## 音声変換モデルの学習

In [14]:
# Listing training/evaluation data
!mkdir -p conf
!ls data/SF/data/ | head -45 | sed -e 's/\.dat//' > conf/train.list
!ls data/SF/data/ | tail -5 | sed -e 's/\.dat//' > conf/eval.list

In [15]:
import numpy as np
import torch
from torch import nn, optim
from torch.nn import functional as F
import os
import sys
import time

In [16]:
def get_dataset(dim=25):
    x = []
    y = []
    datalist = []
    with open("conf/train.list","r") as f:
        for line in f:
            line = line.rstrip()
            datalist.append(line)

    for d in datalist:
        print(d)
        with open("data/SF/data/{}.dat".format(d),"rb") as f:
            dat = np.fromfile(f,dtype="<f8",sep="")
            x.append(dat.reshape(len(dat)//dim,dim))
        with open("data/TF/data/{}.dat".format(d),"rb") as f:
            dat = np.fromfile(f,dtype="<f8",sep="")
            y.append(dat.reshape(len(dat)//dim,dim))
    return x,y

In [17]:
class VCDNN(nn.Module):
        def __init__(self, dim=25, n_units=256):
            super(VCDNN, self).__init__()
            self.fc = nn.ModuleList([
                           nn.Linear(dim, n_units),
                           nn.Linear(n_units, n_units),
                           nn.Linear(n_units, dim)
            ])
            
        def forward(self, x):
            h1 = F.relu(self.fc[0](x))
            h2 = F.relu(self.fc[1](h1))
            h3 = self.fc[2](h2)
            return h3
        
        def get_predata(self, x):
            _x = torch.from_numpy(x.astype(np.float32))
            return self.forward(_x).detach().numpy()

In [18]:
x_train, y_train = get_dataset()
# parameters for training
n_epoch = 50
dim = 25
n_units = 128
N = len(x_train)

model = VCDNN(dim,n_units)
model.double()
optimizer = optim.Adam(model.parameters())

loss_fn = nn.MSELoss()

# loop
model.train()

losses = []
sum_loss = 0

for epoch in range(1, n_epoch + 1):
    sum_loss = 0

    for i in range(0, N):
        x_batch =torch.from_numpy(x_train[i])
        y_batch = torch.from_numpy(y_train[i])
        
        optimizer.zero_grad()
        
        predict_y_batch = model(x_batch)
        loss = loss_fn(predict_y_batch, y_batch)
        loss.backward()
        optimizer.step()
        sum_loss += loss.item()
        
        average_loss = sum_loss / N
        losses.append(average_loss)

        print("epoch: {}/{}  loss: {}".format(epoch, n_epoch, average_loss))

if not os.path.isdir("model"):
    os.mkdir("model")
torch.save(model.state_dict(), "model/vcmodel.model")


atr503_a01
atr503_a02
atr503_a03
atr503_a04
atr503_a05
atr503_a06
atr503_a07
atr503_a08
atr503_a09
atr503_a10
atr503_a11
atr503_a12
atr503_a13
atr503_a14
atr503_a15
atr503_a16
atr503_a17
atr503_a18
atr503_a19
atr503_a20
atr503_a21
atr503_a22
atr503_a23
atr503_a24
atr503_a25
atr503_a26
atr503_a27
atr503_a28
atr503_a29
atr503_a30
atr503_a31
atr503_a32
atr503_a33
atr503_a34
atr503_a35
atr503_a36
atr503_a37
atr503_a38
atr503_a39
atr503_a40
atr503_a41
atr503_a42
atr503_a43
atr503_a44
atr503_a45
epoch: 1/50  loss: 0.022156860242403268
epoch: 1/50  loss: 0.03895483855767881
epoch: 1/50  loss: 0.05599460607183582
epoch: 1/50  loss: 0.07497694082277753
epoch: 1/50  loss: 0.09145679547617543
epoch: 1/50  loss: 0.106792412335982
epoch: 1/50  loss: 0.12438983289474657
epoch: 1/50  loss: 0.1386093248001521
epoch: 1/50  loss: 0.1538566717445854
epoch: 1/50  loss: 0.1674817246277683
epoch: 1/50  loss: 0.18183036667231503
epoch: 1/50  loss: 0.19491403447079947
epoch: 1/50  loss: 0.20460821388802017
ep

epoch: 5/50  loss: 0.027948910444327804
epoch: 5/50  loss: 0.029193520645265137
epoch: 5/50  loss: 0.030196258351134223
epoch: 5/50  loss: 0.03126397793172374
epoch: 5/50  loss: 0.032222160244364716
epoch: 5/50  loss: 0.03322033363604575
epoch: 5/50  loss: 0.03452253224084775
epoch: 5/50  loss: 0.03540692017026725
epoch: 5/50  loss: 0.036254358510794275
epoch: 5/50  loss: 0.03716379510329055
epoch: 5/50  loss: 0.038483944759372925
epoch: 5/50  loss: 0.039513881191786414
epoch: 5/50  loss: 0.040636331326820166
epoch: 5/50  loss: 0.041677027399657764
epoch: 5/50  loss: 0.04254643000214121
epoch: 5/50  loss: 0.04359124982279027
epoch: 5/50  loss: 0.04454097802015025
epoch: 5/50  loss: 0.04563922324291563
epoch: 5/50  loss: 0.04684047891626716
epoch: 5/50  loss: 0.04797842891341195
epoch: 6/50  loss: 0.0012716344723148708
epoch: 6/50  loss: 0.0023240212202404694
epoch: 6/50  loss: 0.003373095829932541
epoch: 6/50  loss: 0.0044853039484321995
epoch: 6/50  loss: 0.005460487258512
epoch: 6/50

epoch: 10/50  loss: 0.024764567607372185
epoch: 10/50  loss: 0.025769103332079046
epoch: 10/50  loss: 0.026944786146826463
epoch: 10/50  loss: 0.027853123405676534
epoch: 10/50  loss: 0.028809145471279076
epoch: 10/50  loss: 0.029716889327971806
epoch: 10/50  loss: 0.030636234304742462
epoch: 10/50  loss: 0.031832742705562134
epoch: 10/50  loss: 0.032636033503110846
epoch: 10/50  loss: 0.033390491167148324
epoch: 10/50  loss: 0.034256087450538306
epoch: 10/50  loss: 0.03552168918252173
epoch: 10/50  loss: 0.036493257604618815
epoch: 10/50  loss: 0.03751999843500534
epoch: 10/50  loss: 0.038491090864991334
epoch: 10/50  loss: 0.0392883336326963
epoch: 10/50  loss: 0.04025172462638872
epoch: 10/50  loss: 0.04114435534831694
epoch: 10/50  loss: 0.04215817642857794
epoch: 10/50  loss: 0.04325980873898286
epoch: 10/50  loss: 0.044316571486485594
epoch: 11/50  loss: 0.0011924406188414498
epoch: 11/50  loss: 0.0021849368836839565
epoch: 11/50  loss: 0.0031776754188419087
epoch: 11/50  loss: 0

epoch: 15/50  loss: 0.023959351369055193
epoch: 15/50  loss: 0.024929439556779898
epoch: 15/50  loss: 0.02607495999648009
epoch: 15/50  loss: 0.026943792293105014
epoch: 15/50  loss: 0.02785402973451672
epoch: 15/50  loss: 0.02873937179203097
epoch: 15/50  loss: 0.02963237047259736
epoch: 15/50  loss: 0.03077780462061003
epoch: 15/50  loss: 0.03154648532072429
epoch: 15/50  loss: 0.03226737096004586
epoch: 15/50  loss: 0.03311538884649389
epoch: 15/50  loss: 0.03435395786509905
epoch: 15/50  loss: 0.03529787348640617
epoch: 15/50  loss: 0.036295159817741215
epoch: 15/50  loss: 0.03724632162897115
epoch: 15/50  loss: 0.038017899656331025
epoch: 15/50  loss: 0.03896013423748509
epoch: 15/50  loss: 0.039829441787831654
epoch: 15/50  loss: 0.04081982751462803
epoch: 15/50  loss: 0.041883546835684016
epoch: 15/50  loss: 0.0429073423489387
epoch: 16/50  loss: 0.001153226109222934
epoch: 16/50  loss: 0.002116662754941452
epoch: 16/50  loss: 0.003086771683453749
epoch: 16/50  loss: 0.004110789

epoch: 20/50  loss: 0.02263058484536917
epoch: 20/50  loss: 0.023589599608105062
epoch: 20/50  loss: 0.02453904762026878
epoch: 20/50  loss: 0.025671499421521683
epoch: 20/50  loss: 0.026523714507869355
epoch: 20/50  loss: 0.027415852465333568
epoch: 20/50  loss: 0.028291914584740625
epoch: 20/50  loss: 0.029175290932334178
epoch: 20/50  loss: 0.03028944241486444
epoch: 20/50  loss: 0.03104624905248161
epoch: 20/50  loss: 0.03175525970382624
epoch: 20/50  loss: 0.03259616408227263
epoch: 20/50  loss: 0.03382065581418033
epoch: 20/50  loss: 0.03475306076013167
epoch: 20/50  loss: 0.035738884135361526
epoch: 20/50  loss: 0.03667637201424731
epoch: 20/50  loss: 0.037430536252803084
epoch: 20/50  loss: 0.038364795541947654
epoch: 20/50  loss: 0.03922271139044215
epoch: 20/50  loss: 0.0402082840626409
epoch: 20/50  loss: 0.04125447606194669
epoch: 20/50  loss: 0.04226250212980333
epoch: 21/50  loss: 0.0011374929349530704
epoch: 21/50  loss: 0.0020843474773201936
epoch: 21/50  loss: 0.003041

epoch: 25/50  loss: 0.023364628758174764
epoch: 25/50  loss: 0.02429688635899708
epoch: 25/50  loss: 0.025422276588136054
epoch: 25/50  loss: 0.02626424673957277
epoch: 25/50  loss: 0.027148121467338773
epoch: 25/50  loss: 0.028024011387563223
epoch: 25/50  loss: 0.028905474765027743
epoch: 25/50  loss: 0.029996120713921378
epoch: 25/50  loss: 0.030744258546446274
epoch: 25/50  loss: 0.03144785831035838
epoch: 25/50  loss: 0.03228479220473667
epoch: 25/50  loss: 0.033503191542323396
epoch: 25/50  loss: 0.03442803455385683
epoch: 25/50  loss: 0.03540780418899718
epoch: 25/50  loss: 0.036335681137762935
epoch: 25/50  loss: 0.03707871883825262
epoch: 25/50  loss: 0.03801042665444527
epoch: 25/50  loss: 0.03886264877972254
epoch: 25/50  loss: 0.03984381067289158
epoch: 25/50  loss: 0.04087910092587655
epoch: 25/50  loss: 0.04187788293542767
epoch: 26/50  loss: 0.001131019348250767
epoch: 26/50  loss: 0.002068576439854187
epoch: 26/50  loss: 0.003019432315807458
epoch: 26/50  loss: 0.004011

epoch: 30/50  loss: 0.023229146856322987
epoch: 30/50  loss: 0.024145396540274207
epoch: 30/50  loss: 0.025263512761202086
epoch: 30/50  loss: 0.02609729793124119
epoch: 30/50  loss: 0.026974900310448115
epoch: 30/50  loss: 0.027851388259814907
epoch: 30/50  loss: 0.028733495945136543
epoch: 30/50  loss: 0.02980602680876334
epoch: 30/50  loss: 0.030548388335379425
epoch: 30/50  loss: 0.031249707336794837
epoch: 30/50  loss: 0.03208545569405108
epoch: 30/50  loss: 0.033298454608256034
epoch: 30/50  loss: 0.034217039133718666
epoch: 30/50  loss: 0.03519249740356963
epoch: 30/50  loss: 0.03611188859651799
epoch: 30/50  loss: 0.03684704163737317
epoch: 30/50  loss: 0.03777280698398617
epoch: 30/50  loss: 0.03862122735014755
epoch: 30/50  loss: 0.03960124032929059
epoch: 30/50  loss: 0.040629779862848894
epoch: 30/50  loss: 0.041621705616963954
epoch: 31/50  loss: 0.0011309508882531914
epoch: 31/50  loss: 0.0020648609697525113
epoch: 31/50  loss: 0.003012865829372332
epoch: 31/50  loss: 0.0

epoch: 35/50  loss: 0.023133659962772127
epoch: 35/50  loss: 0.024035939111029598
epoch: 35/50  loss: 0.025145852841875305
epoch: 35/50  loss: 0.025972125017048184
epoch: 35/50  loss: 0.026844869699237058
epoch: 35/50  loss: 0.027721496713471967
epoch: 35/50  loss: 0.028602962888243707
epoch: 35/50  loss: 0.029661258791652396
epoch: 35/50  loss: 0.030400182124717182
epoch: 35/50  loss: 0.031100496168100105
epoch: 35/50  loss: 0.031936501962380216
epoch: 35/50  loss: 0.03314618546262684
epoch: 35/50  loss: 0.03405996999904349
epoch: 35/50  loss: 0.03503472574617608
epoch: 35/50  loss: 0.035946591977309476
epoch: 35/50  loss: 0.03667678683387496
epoch: 35/50  loss: 0.03759415582875297
epoch: 35/50  loss: 0.038435334081711924
epoch: 35/50  loss: 0.03941147844561778
epoch: 35/50  loss: 0.040434727233943486
epoch: 35/50  loss: 0.04142172538630358
epoch: 36/50  loss: 0.0011347720787392907
epoch: 36/50  loss: 0.002070801610853887
epoch: 36/50  loss: 0.0030198310682709863
epoch: 36/50  loss: 0

epoch: 40/50  loss: 0.023046519183529457
epoch: 40/50  loss: 0.023938229638969143
epoch: 40/50  loss: 0.02504000713477748
epoch: 40/50  loss: 0.02585875155728573
epoch: 40/50  loss: 0.026725167792589596
epoch: 40/50  loss: 0.02759899874891133
epoch: 40/50  loss: 0.028478223755327287
epoch: 40/50  loss: 0.029521571855774056
epoch: 40/50  loss: 0.030256527166401127
epoch: 40/50  loss: 0.030957079675457095
epoch: 40/50  loss: 0.031792783227117566
epoch: 40/50  loss: 0.03300019392043088
epoch: 40/50  loss: 0.033910546491492485
epoch: 40/50  loss: 0.03488517639273987
epoch: 40/50  loss: 0.03579186290127833
epoch: 40/50  loss: 0.03651861145238777
epoch: 40/50  loss: 0.03742754822838997
epoch: 40/50  loss: 0.03826317864053751
epoch: 40/50  loss: 0.03923434456049765
epoch: 40/50  loss: 0.0402488510271563
epoch: 40/50  loss: 0.041232128056226953
epoch: 41/50  loss: 0.0011418993381388895
epoch: 41/50  loss: 0.0020812727998350466
epoch: 41/50  loss: 0.003033167427225862
epoch: 41/50  loss: 0.0040

epoch: 45/50  loss: 0.022960920841915353
epoch: 45/50  loss: 0.02384522862459038
epoch: 45/50  loss: 0.024942154533358494
epoch: 45/50  loss: 0.025755861631067276
epoch: 45/50  loss: 0.026616222409446437
epoch: 45/50  loss: 0.02748558466663082
epoch: 45/50  loss: 0.0283597509766538
epoch: 45/50  loss: 0.029388717034045487
epoch: 45/50  loss: 0.0301204991964866
epoch: 45/50  loss: 0.030820320246269524
epoch: 45/50  loss: 0.03165462665641052
epoch: 45/50  loss: 0.032857984877945653
epoch: 45/50  loss: 0.03376648575282656
epoch: 45/50  loss: 0.034741718665159814
epoch: 45/50  loss: 0.03564499781978719
epoch: 45/50  loss: 0.03636956607575179
epoch: 45/50  loss: 0.03727070865707642
epoch: 45/50  loss: 0.03809890336537518
epoch: 45/50  loss: 0.039063115065679575
epoch: 45/50  loss: 0.04006909249625805
epoch: 45/50  loss: 0.04104796143918902
epoch: 46/50  loss: 0.001145126639111388
epoch: 46/50  loss: 0.002086856762117418
epoch: 46/50  loss: 0.00304110249061124
epoch: 46/50  loss: 0.004011369

epoch: 50/50  loss: 0.024838612313231905
epoch: 50/50  loss: 0.025647029611961712
epoch: 50/50  loss: 0.02650306117160045
epoch: 50/50  loss: 0.027367125894478338
epoch: 50/50  loss: 0.028236824567525895
epoch: 50/50  loss: 0.029252842096865194
epoch: 50/50  loss: 0.029980613036789425
epoch: 50/50  loss: 0.030679370617771044
epoch: 50/50  loss: 0.03151166504611028
epoch: 50/50  loss: 0.03271021966846676
epoch: 50/50  loss: 0.03361652635926741
epoch: 50/50  loss: 0.03459147581754196
epoch: 50/50  loss: 0.035492436418812764
epoch: 50/50  loss: 0.03621477169154329
epoch: 50/50  loss: 0.03710946408606979
epoch: 50/50  loss: 0.03793255526226545
epoch: 50/50  loss: 0.038890195138867524
epoch: 50/50  loss: 0.039886716732341704
epoch: 50/50  loss: 0.040860393042435675


In [19]:
!ls ./model/

vcmodel.model


## 学習したモデルによる音声の変換

In [20]:
import numpy as np
import pysptk as sptk
import pyworld as pw
from scipy.io import wavfile
import os
import sys
import time

In [21]:
dim = 25
n_units = 128

model = VCDNN(dim,n_units)
_ = model.load_state_dict(torch.load("model/vcmodel.model"))

In [22]:
# test data
x = []
datalist = []
with open("conf/eval.list","r") as f:
    for line in f:
        line = line.rstrip()
        datalist.append(line)

for d in datalist:
    with open("data/SF/mgc/{}.mgc".format(d),"rb") as f:
        dat = np.fromfile(f,dtype="<f8",sep="")
        x.append(dat.reshape(len(dat)//dim,dim))

if not os.path.isdir("result"):
    os.mkdir("result")
if not os.path.isdir("result/wav"):
    os.mkdir("result/wav")

fs = 16000
fftlen = 512
alpha = 0.42
for i in range(0,len(datalist)):
    outfile = "result/wav/{}.wav".format(datalist[i])
    with open("data/SF/f0/{}.f0".format(datalist[i]),"rb") as f:
        f0 = np.fromfile(f, dtype="<f8", sep="")
    with open("data/SF/ap/{}.ap".format(datalist[i]),"rb") as f:
        ap = np.fromfile(f, dtype="<f8", sep="")
        ap = ap.reshape(len(ap)//(fftlen+1),fftlen+1)
    y = model.get_predata(x[i])
    y = y.astype(np.float64)
    sp = sptk.mc2sp(y, alpha, fftlen*2)
    owav = pw.synthesize(f0, sp, ap, fs)
    owav = np.clip(owav, -32768, 32767)
    wavfile.write(outfile, fs, owav.astype(np.int16))

In [23]:
!ls result/wav

atr503_a46.wav	atr503_a47.wav	atr503_a48.wav	atr503_a49.wav	atr503_a50.wav
