# WaveNet - Generate a Sample

In [1]:
import sys, os
#sys.path.append('../../src/')
sys.path.append('../../network/')

In [2]:
import torch
import librosa
import librosa.output
import datetime
import numpy as np
from types import SimpleNamespace
torch.cuda.empty_cache()
import random

In [3]:
import matplotlib.pyplot as plt
plt.style.use('seaborn')

In [4]:
from models.wavenet_no_padding_good_3.model import WaveNet
import models.wavenet_no_padding_good_3.utils.data as utils

In [5]:
params = SimpleNamespace(
    layer_size=10,
    stack_size=5,
    in_channels=256,
    res_channels=512,
    sample_size=15_000,
    sample_rate=22_050,
    length=10
)

In [6]:
class Generator:
    def __init__(self, args):
        self.args = args

        self.wavenet = WaveNet(args.layer_size, args.stack_size,
                               args.in_channels, args.res_channels)

        self.wavenet.load(args.model_dir, args.model_name, args.step)

    @staticmethod
    def _variable(data):
        tensor = torch.from_numpy(data).float()

        if torch.cuda.is_available():
            return torch.autograd.Variable(tensor.cuda())
        else:
            return torch.autograd.Variable(tensor)

    def _make_seed(self, audio):
        audio = np.pad([audio], [[0, 0], [self.wavenet.receptive_fields, 0], [0, 0]], 'constant')

        if self.args.sample_size:
            seed = audio[:, :self.args.sample_size, :]
        else:
            seed = audio[:, :self.wavenet.receptive_fields*2, :]

        return seed

    def _get_seed_from_audio(self, filepath):
        audio = utils.load_audio(filepath, self.args.sample_rate)
        i = random.choice(np.arange(0, len(audio)-self.wavenet.receptive_fields))
        audio = audio[i:]
        audio_length = len(audio)

        audio = utils.mu_law_encode(audio, self.args.in_channels)
        audio = utils.one_hot_encode(audio, self.args.in_channels)

        seed = self._make_seed(audio)

        return self._variable(seed), audio_length

    def _save_to_audio_file(self, data):
        data = data[0].cpu().data.numpy()
        data = utils.one_hot_decode(data, axis=1)
        audio = utils.mu_law_decode(data, self.args.in_channels)

        librosa.output.write_wav(self.args.out, np.array(audio, dtype="float32"), self.args.sample_rate)
        print('Saved wav file at {}'.format(self.args.out))

        return audio#librosa.get_duration(y=audio, sr=self.args.sample_rate)

    def generate(self):
        
        with torch.no_grad():
            outputs = []
            inputs, audio_length = self._get_seed_from_audio(self.args.seed)

            while True:
                new = self.wavenet.generate(inputs)

                outputs = torch.cat((outputs, new), dim=1) if len(outputs) else new

                print('{0}/{1} samples are generated.'.format(len(outputs[0]), self.args.length*self.args.sample_rate))

                if len(outputs[0]) >= self.args.length*self.args.sample_rate:
                    break

                inputs = torch.cat((inputs[:, :-len(new[0]), :], new), dim=1)

            outputs = outputs[:, :self.args.length*self.args.sample_rate, :]

        return self._save_to_audio_file(outputs)

In [9]:
params.model_dir = '../../network/weights/wavenet/'
params.model_name = 'wavenet-tapping-glass-tiny-jar'
params.step = 0
params.seed = '../../data/processed/tapping/tapping-glass/PLhDdb5CgZ4-tiny-jar.wav'
params.out = '../../network/outputs/wavenet/wavenet-out-tapping-glass-tiny-jar-nn.wav'

In [10]:
#os.environ["CUDA_VISIBLE_DEVICES"]=""
generator = Generator(params)
x = generator.generate()

2 GPUs are detected.
Loading model from ../../network/weights/wavenet/
../../data/processed/tapping/tapping-glass/PLhDdb5CgZ4-tiny-jar.wav
9885/220500 samples are generated.
19770/220500 samples are generated.
29655/220500 samples are generated.
39540/220500 samples are generated.
49425/220500 samples are generated.
59310/220500 samples are generated.
69195/220500 samples are generated.
79080/220500 samples are generated.
88965/220500 samples are generated.
98850/220500 samples are generated.
108735/220500 samples are generated.
118620/220500 samples are generated.
128505/220500 samples are generated.
138390/220500 samples are generated.
148275/220500 samples are generated.
158160/220500 samples are generated.
168045/220500 samples are generated.
177930/220500 samples are generated.
187815/220500 samples are generated.
197700/220500 samples are generated.
207585/220500 samples are generated.
217470/220500 samples are generated.
227355/220500 samples are generated.


IndexError: tuple index out of range

In [None]:
plt.plot(x)

In [None]:
generator.wavenet.receptive_fields

In [None]:
seed, sr = librosa.load(params.seed)
plt.plot(seed[:20000])

In [None]:
last = x.copy()