In [58]:
import umap
import numpy as np

from tqdm import tqdm

from src.models.spectrogram_vae import SpectrogramVAE
from src.utils import audio_to_spectrogram
from src.plot_utils import *

In [59]:
DAFX_NAME = "mda Ambience"
FORMATTED_DAFX_NAME = DAFX_NAME.split()[-1].lower()
NUM_EXAMPLES = 10_000
CHECKPOINT = "/home/kieran/Level5ProjectAudioVAE/src/l5proj_spectrogram_vae/hdx3y4ly/checkpoints/epoch=169-step=35530.ckpt"
CHECKPOINT_ID = CHECKPOINT.split("/")[-3]
SAVE_PATH = "/home/kieran/Level5ProjectAudioVAE/src/evaluation/data/param_extraction"

In [60]:
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

In [61]:
dafx = dafx_from_name(DAFX_NAME)

In [62]:
dafx.param_min_max_vals

{'freq_hz': {'min': 0.0, 'max': 16000.0},
 'fine_hz': {'min': 0.0, 'max': 100.0},
 'feedback': {'min': 0.0, 'max': 100.0}}

In [63]:
model = SpectrogramVAE.load_from_checkpoint(CHECKPOINT).to(DEVICE)
model.eval()

SpectrogramVAE(
  (encoder_conv): Sequential(
    (0): Sequential(
      (0): Conv2d(1, 8, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): ReLU()
      (2): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): Sequential(
      (0): Conv2d(8, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): ReLU()
      (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (2): Sequential(
      (0): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): ReLU()
      (2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (3): Sequential(
      (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): ReLU()
      (2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (mu): Linear(in_features=37152, out_features=128, bias=True)
  (log_var): Linear(in_features=37152, 

In [64]:
dataset = get_audio_dataset(dafx_from_name('clean'),
                            num_examples_per_epoch=NUM_EXAMPLES)

100%|████████████████████████████████████████| 88/88 [00:00<00:00, 44013.68it/s]


Loaded 88 files for train = 66.89 hours.





In [None]:
settings = []
embeddings = []

x = next(iter(dataset))

for i in tqdm(range(NUM_EXAMPLES)):
    setting = dafx.get_random_parameter_settings()

    # Apply setting to audio
    y = dafx.apply(x, setting)
    y = y.unsqueeze(0).unsqueeze(0)

    X = audio_to_spectrogram(signal=y,
                             n_fft=model.hparams.n_fft,
                             hop_length=model.hparams.hop_length,
                             window_size=model.hparams.window_size).to(DEVICE)

    _, _, _, z = model(X)

    settings.append(setting.cpu().detach().numpy())
    embeddings.append(z.cpu().detach().numpy())

 38%|███▊      | 3772/10000 [00:36<01:05, 95.71it/s] 

In [None]:
data = np.array(embeddings).squeeze()

In [None]:
settings = np.array(settings).squeeze()

In [None]:
emb = umap.UMAP(n_neighbors=15, min_dist=0.1, metric='euclidean').fit_transform(data)

In [None]:
print("Latent embedding shape: ", data.shape)
print("Param settings shape: ", settings.shape)
print("UMAP projection shape: ", emb.shape)

In [None]:
np.save(f"{SAVE_PATH}/{FORMATTED_DAFX_NAME}_data.npy", data)
np.save(f"{SAVE_PATH}/{FORMATTED_DAFX_NAME}_settings.npy", settings)
np.save(f"{SAVE_PATH}/{FORMATTED_DAFX_NAME}_projection.npy", emb)