In [1]:
import numpy as np
import torch
import torch.nn.functional as F
from torch import nn
from torch import autograd
from torch.nn import init
import math
from ops import mu_law_encode, one_hot, time_to_batch, batch_to_time
from model import WaveNetModel
from audio_reader import AudioReader
import torch.optim as optim
import librosa
import time
import Queue
import json
FILE_PATTERN = r'p([0-9]+)_([0-9]+)\.wav'

In [2]:
with open('./wavenet_params.json', 'r') as config_file:
    wavenet_params = json.load(config_file)

In [3]:
model = WaveNetModel(
                 wavenet_params["batch_size"],
                 wavenet_params["dilations"],
                 wavenet_params["filter_width"],
                 wavenet_params["residual_channels"],
                 wavenet_params["dilation_channels"],
                 wavenet_params["skip_channels"],
                 quantization_channels = wavenet_params["quantization_channels"],
                 use_biases = wavenet_params["use_biases"],
                 scalar_input = wavenet_params["scalar_input"],
                 use_cuda = wavenet_params["use_cuda"],
                 initial_filter_width = wavenet_params["initial_filter_width"],
                 global_condition_channels=None,
                 global_condition_cardinality=None)
if model.use_cuda:
    model = model.cuda()
optimizer = optim.SGD(model.parameters(), lr=0.00001, momentum = 0.9, nesterov = True)
model.load_state_dict(torch.load('./logdir2/step18462-loss=2.960_model.txt'))

In [4]:
reader = AudioReader(
        "/home/administrator/workspace/true_model/VCTK-Corpus/wav48",
        sample_rate=wavenet_params["sample_rate"],
        gc_enabled=None,
        receptive_field=model.calculate_receptive_field(model.filter_width,
                                                        model.dilations,
                                                        model.scalar_input,
                                                        model.initial_filter_width),
        sample_size=40000,
        silence_threshold= 0.01,
        load_size=32)   
start_time = time.time()
num_iters = reader.thread_main()
print (time.time() - start_time)

5.85329794884


In [None]:
optimizer = optim.SGD(model.parameters(), lr=0.00001, momentum = 0.9, nesterov = True)

In [None]:
epoch = 0
logdir_root = './logdir2'
LossFile = open(logdir_root+'/loss.txt', 'w')
a = np.arange(num_iters)
step = 0
while 1:
    num_iters = reader.thread_main()
    for iter in range(num_iters):
        start_time = time.time()
        audio = reader.data_set.get()
        if model.use_cuda:
            audio = autograd.Variable(torch.FloatTensor(audio)).cuda().t()
        else:
            audio = autograd.Variable(torch.FloatTensor(audio)).t()        
        model.zero_grad()
        loss = model.wavenet_loss(audio)
        LossFile.write('{:.3f}\n'.format(loss.cpu().data[0]))
        loss.backward()
        optimizer.step()
        duration = time.time() - start_time
        step += 1
    if epoch % 50 == 0:
        if model.use_cuda:
            print('epoch {:d}, step {:d} - loss = {:.3f}, ({:.3f} sec/step)'
                  .format(epoch, step, loss.cpu().data[0], duration))
            ModelFile = open(logdir_root+'/step{:d}-loss={:.3f}_model.txt'.format(step, loss.data[0]), 'w')
            
        else:
            print('epoch {:d}, step {:d} - loss = {:.3f}, ({:.3f} sec/step)'
                  .format(epoch, step, loss.cpu().data[0], duration))
            ModelFile = open(logdir_root+'/step{:d}-loss={:.3f}_model.txt'.format(step, loss.data[0]), 'w')                
        torch.save(model.state_dict(), ModelFile)
        ModelFile.close()
    epoch += 1

In [None]:
print model.dilations