In [2]:
import torch
from torch.utils.data import Dataset, TensorDataset, DataLoader

from torch import nn, optim
from torch.nn import functional as F
from torchvision import datasets, transforms

import random
import numpy as np
import umap

import os, sys, argparse, time
from pathlib import Path

sys.path.append('../')
from cabbage.model import VAE

import librosa
import soundfile as sf
import configparser
import random
import json
import matplotlib.pyplot as plt
%matplotlib inline

In [7]:
config_path = Path('../default.ini')  
config = configparser.ConfigParser(allow_no_value=True)
config.read(config_path)

# Import audio configs 
sampling_rate = config['audio'].getint('sampling_rate')
hop_length = config['audio'].getint('hop_length')
bins_per_octave = config['audio'].getint('bins_per_octave')
num_octaves = config['audio'].getint('num_octaves')
n_bins = int(num_octaves * bins_per_octave)
n_iter = config['audio'].getint('n_iter')
cqt_bit_depth = config['audio'].get('cqt_bit_depth')

if cqt_bit_depth == "float64":
  torch.set_default_dtype(torch.float64)
  dtype = np.float64
elif cqt_bit_depth == "float64":
  torch.set_default_dtype(torch.float32)
  dtype = np.float32
else:
  raise TypeError('{} cqt_bit_depth datatype is unknown. Choose either float32 or float64'.format(cqt_bit_depth))

# Dataset
dataset = Path(config['dataset'].get('datapath'))
if not dataset.exists():
  raise FileNotFoundError(dataset.resolve())

cqt_dataset = config['dataset'].get('cqt_dataset')

if config['dataset'].get('workspace') != None:
  workspace = Path(config['dataset'].get('workspace'))

run_number = config['dataset'].getint('run_number')
my_cqt = dataset / cqt_dataset

if not my_cqt.exists():
  raise FileNotFoundError(my_cqt.resolve())

my_audio = dataset / 'audio'

test_audio = config['dataset'].get('test_dataset')
my_test_audio = dataset / test_audio

if not my_test_audio.exists():
  raise FileNotFoundError(my_test_audio.resolve())

generate_test = config['dataset'].get('generate_test')    

# Training configs
epochs = config['training'].getint('epochs')
learning_rate = config['training'].getfloat('learning_rate')
batch_size = config['training'].getint('batch_size')
checkpoint_interval = config['training'].getint('checkpoint_interval')
save_best_model_after = config['training'].getint('save_best_model_after')

# Model configs
latent_dim = config['VAE'].getint('latent_dim')
n_units = config['VAE'].getint('n_units')
kl_beta = config['VAE'].getfloat('kl_beta')
device = config['VAE'].get('device')

# etc
example_length = config['extra'].getint('example_length')
normalize_examples = config['extra'].getboolean('normalize_examples')
plot_model = config['extra'].getboolean('plot_model')

desc = config['extra'].get('description')
start_time = time.time()
config['extra']['start'] = time.asctime( time.localtime(start_time) )

device = torch.device(device)
device_name = torch.cuda.get_device_name()
print('Device: {}'.format(device_name))
config['VAE']['device_name'] = device_name

Device: NVIDIA GeForce RTX 3090


In [8]:
# Load the dataset
print('creating the dataset...')
training_array = []
new_loop = True

for f in os.listdir(my_cqt): 
    if f.endswith('.npy'):
        print('adding-> %s' % f)
        file_path = my_cqt / f
        new_array = np.load(file_path)
        if new_loop:
            training_array = new_array
            new_loop = False
        else:
            training_array = np.concatenate((training_array, new_array), axis=0)

total_cqt = len(training_array)
print('Total number of CQT frames: {}'.format(total_cqt))
config['dataset']['total_frames'] = str(total_cqt)

training_tensor = torch.Tensor(training_array)
training_dataset = TensorDataset(training_tensor)
training_dataloader = DataLoader(training_dataset, batch_size=batch_size, shuffle=True)

creating the dataset...
adding-> 472449__erokia__msfxp-sound-89.npy
adding-> 472450__erokia__msfxp-sound-4.npy
adding-> 472451__erokia__msfxp-sound-399.npy
adding-> 472452__erokia__msfxp-sound-398.npy
adding-> 472453__erokia__msfxp-sound-397.npy
adding-> 472454__erokia__msfxp-sound-402.npy
adding-> 472455__erokia__msfxp-sound-401.npy
adding-> 472456__erokia__msfxp-sound-400.npy
adding-> 472912__erokia__msfxp-sound-96.npy
adding-> 472913__erokia__msfxp-sound-405.npy
adding-> 472914__erokia__msfxp-sound-404.npy
adding-> 472915__erokia__msfxp-sound-403.npy
adding-> 472916__erokia__msfxp-sound-40.npy
adding-> 472917__erokia__msfxp-sound-409.npy
adding-> 472918__erokia__msfxp-sound-408.npy
adding-> 472919__erokia__msfxp-sound-407.npy
adding-> 472920__erokia__msfxp-sound-406.npy
adding-> 472921__erokia__msfxp-sound-308.npy
adding-> 472922__erokia__msfxp-sound-309.npy
adding-> 472923__erokia__msfxp-sound-410.npy
adding-> 472924__erokia__msfxp-sound-41.npy
adding-> 472925__erokia__msfxp-sound-

In [10]:
# Load the model

run_id = 'run-004'
run_path = dataset.joinpath(desc).joinpath(run_id)
model_path = run_path.joinpath('model').joinpath('best_model.pt')

model = VAE(n_bins, n_units, latent_dim).to(device)
model = torch.load(model_path)
model.eval()

VAE(
  (fc1): Linear(in_features=384, out_features=2048, bias=True)
  (fc21): Linear(in_features=2048, out_features=256, bias=True)
  (fc22): Linear(in_features=2048, out_features=256, bias=True)
  (fc3): Linear(in_features=256, out_features=2048, bias=True)
  (fc4): Linear(in_features=2048, out_features=384, bias=True)
)

In [25]:
init_dataset_z = True

for iterno, dataset_tuple in enumerate(training_dataloader):
  # Checking this....
  dataset_sample, = dataset_tuple
  with torch.no_grad():
    dataset_sample = dataset_sample.to(device)
    z_mu, z_logvar = model.encode(dataset_sample)
  
  if init_dataset_z:
    dataset_z_mu = z_mu
    dataset_z_logvar = z_logvar
    init_dataset_z = False
  
  else:
    dataset_z_mu = torch.cat((dataset_z_mu, z_mu ),0)
    dataset_z_logvar = torch.cat((dataset_z_logvar, z_logvar ),0)
    

In [26]:
testpath = run_path.joinpath('test')
os.makedirs(testpath)

In [28]:
np.save(testpath.joinpath('dataset_z_mu.npy'), dataset_z_mu.cpu().numpy())
np.save(testpath.joinpath('dataset_z_logvar.npy'), dataset_z_logvar.cpu().numpy())


TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.