## Load Model

In [None]:
from wavenet_model import *
from audio_data import WavenetDataset

dtype = torch.FloatTensor
ltype = torch.LongTensor

use_cuda = torch.cuda.is_available()
if use_cuda:
    print('use gpu')
    dtype = torch.cuda.FloatTensor
    ltype = torch.cuda.LongTensor

In [None]:
model = WaveNetModel(layers=10,
                     blocks=3,
                     dilation_channels=32,
                     residual_channels=32,
                     skip_channels=1024,
                     end_channels=512, 
                     output_length=16,
                     dtype=dtype, 
                     bias=True)
model = load_latest_model_from('snapshots', use_cuda=use_cuda)

model.dtype = dtype
if use_cuda:
    model.cuda()
else:
    model.cpu()
    
print('model: ', model)
print('receptive field: ', model.receptive_field)
print('parameter count: ', model.parameter_count())

In [None]:
data = WavenetDataset(dataset_file='train_samples/bach_chaconne/dataset.npz',
                      item_length=model.receptive_field + model.output_length - 1,
                      target_length=model.output_length,
                      file_location='train_samples/bach_chaconne',
                      test_stride=500)
print('the dataset has ' + str(len(data)) + ' items')

## Deployment

In [None]:
import torch
import torch.nn as nn
import os
import onnxruntime
import numpy as np
import onnx
import shutil
from timeit import default_timer as timer
import vai_q_onnx

### PyTorch (CPU)

In [None]:
start_data = data[260000][0] # use start data from the data set
start_data = torch.max(start_data, 0)[1] # convert one hot vectors to integers

first_samples = start_data
input_data = Variable(torch.FloatTensor(1, model.classes, 1).zero_())
input_data = input_data.scatter_(1, first_samples[0:1].view(1, -1, 1), 1.)

start = timer()
for _ in range(1000):
    model(input_data)
pytorch_total = timer() - start

print(f"Inference Time: {pytorch_total / 1000}")

In [None]:
def prog_callback(step, total_steps):
    print(str(100 * step // total_steps) + "% generated")

start = timer()
generated = model.generate_fast(num_samples=160000,
                                 first_samples=start_data,
                                 progress_callback=prog_callback,
                                 progress_interval=1000,
                                 temperature=1.0,
                                 regularize=0.)
pytorch_total = timer() - start

print(f"Generation Time: {pytorch_total}")

### ONNX Runtime (CPU)

In [None]:
# Prep for ONNX export
inputs = {"x": torch.rand(1, 256, 1)}
input_names = ['input']
output_names = ['output']
dynamic_axes = {'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}}
model_path = "models/wavenet.onnx"

# Call export function
torch.onnx.export(
        model,
        inputs,
        model_path,
        export_params=True,
        opset_version=13,  # Recommended opset
        input_names=input_names,
        output_names=output_names,
        dynamic_axes=dynamic_axes,
    )

In [None]:
# Specify the path to the quantized ONNZ Model
model_path = r'./models/wavenet.onnx'
onnx_model = onnx.load(model_path)

cpu_options = onnxruntime.SessionOptions()

# Create Inference Session to run the quantized model on the CPU
cpu_session = onnxruntime.InferenceSession(
    onnx_model.SerializeToString(),
    providers = ['CPUExecutionProvider'],
    sess_options=cpu_options,
)

# Run Inference
start = timer()
for _ in range(1000):
    cpu_results = cpu_session.run(None, {})
cpu_total = timer() - start

print(f"Inference Time: {cpu_total / 1000}")

In [None]:
def generate_fast(model,
                    num_samples,
                    first_samples=None,
                    temperature=1.,
                    regularize=0.,
                    progress_callback=None,
                    progress_interval=100,
                    session=None):
    model.eval()
    if first_samples is None:
        first_samples = torch.LongTensor(1).zero_() + (model.classes // 2)
    first_samples = Variable(first_samples)

    # reset queues
    for queue in model.dilated_queues:
        queue.reset()

    num_given_samples = first_samples.size(0)
    total_samples = num_given_samples + num_samples

    input = Variable(torch.FloatTensor(1, model.classes, 1).zero_())
    input = input.scatter_(1, first_samples[0:1].view(1, -1, 1), 1.)

    # fill queues with given samples
    for i in range(num_given_samples - 1):
        x = torch.tensor(session.run(None, {})[0])
        input.zero_()
        input = input.scatter_(1, first_samples[i + 1:i + 2].view(1, -1, 1), 1.).view(1, model.classes, 1)

        # progress feedback
        if i % progress_interval == 0:
            if progress_callback is not None:
                progress_callback(i, total_samples)

    # generate new samples
    generated = np.array([])
    regularizer = torch.pow(Variable(torch.arange(model.classes)) - model.classes / 2., 2)
    regularizer = regularizer.squeeze() * regularize
    tic = time.time()
    for i in range(num_samples):
        x = torch.tensor(session.run(None, {})[0]).squeeze()

        x -= regularizer

        if temperature > 0:
            # sample from softmax distribution
            x /= temperature
            prob = F.softmax(x, dim=0)
            prob = prob.cpu()
            np_prob = prob.data.numpy()
            x = np.random.choice(model.classes, p=np_prob)
            x = np.array([x])
        else:
            # convert to sample value
            x = torch.max(x, 0)[1][0]
            x = x.cpu()
            x = x.data.numpy()

        o = (x / model.classes) * 2. - 1
        generated = np.append(generated, o)

        # set new input
        x = Variable(torch.from_numpy(x).type(torch.LongTensor))
        input.zero_()
        input = input.scatter_(1, x.view(1, -1, 1), 1.).view(1, model.classes, 1)

        if (i+1) == 100:
            toc = time.time()
            print("one generating step does take approximately " + str((toc - tic) * 0.01) + " seconds)")

        # progress feedback
        if (i + num_given_samples) % progress_interval == 0:
            if progress_callback is not None:
                progress_callback(i + num_given_samples, total_samples)

    model.train()
    mu_gen = mu_law_expansion(generated, model.classes)
    return mu_gen

start = timer()
generated = generate_fast(model=model,
                          num_samples=160000,
                          first_samples=start_data,
                          progress_callback=prog_callback,
                          progress_interval=1000,
                          temperature=1.0,
                          regularize=0.,
                          session=cpu_session)
cpu_total = timer() - start

print(f"Generation Time: {cpu_total}")

### ONNX Runtime (NPU)

In [None]:
# Point to the config file path used for the VitisAI Execution Provider
config_file_path = "vaip_config.json"

aie_options = onnxruntime.SessionOptions()

aie_session = onnxruntime.InferenceSession(
    onnx_model.SerializeToString(),
    providers=['VitisAIExecutionProvider'],
    sess_options=aie_options,
    provider_options = [{'config_file': config_file_path}]
)

# Run Inference
start = timer()
for _ in range(1000):
    npu_results = aie_session.run(None, {})
npu_total = timer() - start

print(f"Inference Time: {npu_total / 1000}")

In [None]:
start = timer()
generated = generate_fast(model=model,
                          num_samples=160000,
                          first_samples=start_data,
                          progress_callback=prog_callback,
                          progress_interval=1000,
                          temperature=1.0,
                          regularize=0.,
                          session=aie_session)
npu_total = timer() - start

print(f"Generation Time: {npu_total}")

## Generate Audio

In [None]:
import IPython.display as ipd

ipd.Audio(generated, rate=16000)