# Results

In [1]:
!nvidia-smi

Wed Feb 22 09:17:33 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.60.13    Driver Version: 525.60.13    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:61:00.0 Off |                    0 |
| N/A   36C    P0   152W / 300W |  16645MiB / 32768MiB |     48%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  Tesla V100-SXM2...  Off  | 00000000:62:00.0 Off |                    0 |
| N/A   49C    P0   165W / 300W |  20735MiB / 32768MiB |     88%      Defaul

In [39]:
import IPython.display as ipd
import os, sys

import numpy as np
import torch
import torchaudio
from pytorch_lightning import Trainer
import yaml
import soundfile as sf
from parallel_wavegan.utils import load_model

os.environ["CUDA_VISIBLE_DEVICES"] = '2'
PROJECT_ROOT = "/project/fdreyer/projects/vqvae-vc"
sys.path.append(PROJECT_ROOT)
%cd {PROJECT_ROOT}

from src.models.hle_vqvae_vc import HleVqVaeVc
from src.data.datamodules import VCDataModule

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Use {device} device")

sr = 24_000

/project/fdreyer/projects/vqvae-vc
Use cuda device


In [3]:
VCTK_DATASET_CONFIG_PATH = os.path.join(PROJECT_ROOT, "config", "data", "vctk20", "vctk20mel-24kHZ.yml")
with open(VCTK_DATASET_CONFIG_PATH) as f:
    vctk_config = yaml.safe_load(f)["data"]

vctk_data_module = VCDataModule(**vctk_config)
vctk_data_module.prepare_data()
vctk_val_data_loader = vctk_data_module.val_dataloader()
vctk_train_data_loader = vctk_data_module.train_dataloader()

Create Train Dataset:
Load audio info
Load audio datasets
Create Val Dataset:
Load audio info
Load audio datasets


In [72]:
HLE_VQVAE_CHECKPOINT_PATH = os.path.join(PROJECT_ROOT, "lightning_logs", "version_28", "checkpoints", "epoch=54-step=2332.ckpt")
hle_vqvae_vc = HleVqVaeVc.load_from_checkpoint(HLE_VQVAE_CHECKPOINT_PATH)

In [50]:
VOCODER_CHECKPOINT_PATH = os.path.join(PROJECT_ROOT, "external", "hifigan_vocoder", "checkpoint-2500000steps.pkl")
vocoder = load_model(VOCODER_CHECKPOINT_PATH).to(device)

In [57]:
trainer = Trainer(accelerator="gpu")
trainer.validate(model=hle_vqvae_vc, dataloaders=vctk_val_data_loader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [2]
  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")


In [81]:
batch = next(iter(vctk_val_data_loader))
originals, speakers = batch
reconstructions, _, _ = hle_vqvae_vc(originals, speakers)
print(f"Audios shape: {originals.shape}\nSpeakers shape: {speakers.shape}\nReconstructions shape: {reconstructions.shape}")

Audios shape: torch.Size([64, 80, 696])
Speakers shape: torch.Size([64])
Reconstructions shape: torch.Size([64, 80, 696])


In [82]:
utterance = 2
mel_original = originals[utterance].transpose(-1, -2).squeeze().to(device)
wav_original = vocoder.inference(mel_original).squeeze().detach().cpu()
mel_reconstruction = reconstructions[utterance].transpose(-1, -2).squeeze().to(device)
wav_reconstruction = vocoder.inference(mel_reconstruction).squeeze().detach().cpu()

In [83]:
display(ipd.Audio(wav_original, rate=sr))

In [85]:
display(ipd.Audio(wav_reconstruction, rate=sr))

In [86]:
sf.write(f"./reports/audios/{utterance}-oritinal.wav", wav_original, sr, format="wav")
sf.write(f"./reports/audios/{utterance}-reconstructed.wav", wav_reconstruction, sr, format="wav")