## Synthesising single samples from a trained model

In [None]:
import numpy as np
import json
from IPython.display import display, Audio
from tqdm import tqdm
import soundfile
import os
import torch

from models import ccwavegan_gen_xs

In [None]:
# GPU
# n_gpu = torch.cuda.device_count()
# print("n_gpu: ", n_gpu)

# os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
# os.environ["CUDA_VISIBLE_DEVICES"]="0"

# visible_gpu = os.environ["CUDA_VISIBLE_DEVICES"]
# print("visible_gpus: ", visible_gpu)

# if visible_gpu == 0:
#     print("Warning: There\'s no GPU available on this machine")
#     device = 'cpu'
# else:
#     device = 'cuda:0'
#     torch.set_default_tensor_type('torch.cuda.FloatTensor')
#     print("Default tensor type set to torch.cuda.FloatTensor")

# print("Device: ", device)

#### Get the trained model and class labels

In [None]:
path_to_model = 'checkpoints/2021-09-20_13h23m-hifi/120000_batch_model.pth'
path_to_labels = 'checkpoints/2021-09-20_13h23m-hifi/label_names.json'
path_to_output = 'checkpoints/2021-09-20_13h23m-hifi/120k_generated_audio_'
if not os.path.exists(path_to_output):
    os.makedirs(path_to_output)

z_dim = 100
sample_rate = 16000

In [None]:
#load the generator
model = torch.load(path_to_model, map_location=torch.device('cpu'))

generator = ccwavegan_gen_xs.CCWaveGANGenerator(
    latent_dim=100,
    n_classes=7,
    verbose=False,
    upsample_mode='zeros'
)

generator.load_state_dict(model['g_state_dict'])

In [None]:
#read the labels from the generated dictionary during training
with open(path_to_labels) as json_file:
    label_names = json.load(json_file)
label_names

#### Generating a single sample (with label)

In [None]:
generator.eval()

#create noise and label
label = 0
noise = torch.normal(mean=0, std=1, size=(1, z_dim))
label_synth = torch.tensor([label], dtype=torch.int)

#synthesise the audio
%time synth_audio = generator(noise, label_synth)

torch.squeeze(synth_audio)

display(Audio(synth_audio[0].detach().cpu().numpy(), rate = sample_rate))

In [None]:
import timeit

code = '''_ = generator(noise, label_synth)'''
setup = ''

timeit.repeat(setup=setup, stmt=code, repeat=3)

In [None]:
# GPU generation time

#create noise and label
label = 0
noise = torch.normal(mean=0, std=1, size=(1, z_dim))
label_synth = torch.tensor([label], dtype=torch.int)

generator.eval()
starter, ender = torch.cuda.Event(enable_timing=True), torch.cuda.Event(enable_timing=True)
repetitions = 10000
timings=np.zeros((repetitions,1))

#GPU-WARM-UP
for _ in range(10):
   _ = generator(noise, label_synth)

# MEASURE PERFORMANCE
with torch.no_grad():
  for rep in range(repetitions):
     starter.record()
     _ = generator(noise, label_synth)
     ender.record()
     # WAIT FOR GPU SYNC
     torch.cuda.synchronize()
     curr_time = starter.elapsed_time(ender)
     timings[rep] = curr_time
mean_syn = np.sum(timings) / repetitions
std_syn = np.std(timings)
print("mean time (ms): ", mean_syn)
print("std time (ms): ", std_syn)

### Batch generation

In [None]:
#how many samples per label
n_samples_label = 10

In [None]:
for _class in tqdm(label_names):
    if not os.path.exists(f'{path_to_output}/{label_names[_class]}'):
        os.makedirs(f'{path_to_output}/{label_names[_class]}')
    noise  = torch.normal(mean=0, std=1, size=(n_samples_label, z_dim))
    label_synth = torch.full((n_samples_label,1), int(_class))
    synth_audio = generator(noise, label_synth)
    for i in range(n_samples_label):
        soundfile.write(
            file = '%s/%s/%s_%s.wav' % (path_to_output, label_names[_class], label_names[_class], i),
            data = np.squeeze(synth_audio[i].detach().cpu().numpy()), 
            samplerate = sample_rate
            )