In [1]:
from model.wavenet import WaveNet
from model.conv_encoder import ConvEncoder, ConvDecoder
from model.vq_vae import VQ_VAE

from vq_vae_trainer import VQ_VAE_Trainer

from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession

config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

In [2]:
############################################################
# Current VQ-VAE implementation here
# - Input audio
# ---> Conv Encoder (Downsample)
# ---> VQ (Latent space)
# ---> Conv Decoder (Upsample)
# ---> WaveNet (Generate)
# - Output audio
############################################################
#wavenet = WaveNet()
#conv_encoder = ConvEncoder()
vq = VQ_VAE()

Model: "Convolutional-Encoder"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 16000, 1)]        0         
_________________________________________________________________
conv1d (Conv1D)              (None, 8000, 32)          160       
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 4000, 32)          4128      
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 2000, 32)          4128      
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 1000, 32)          4128      
_________________________________________________________________
conv1d_4 (Conv1D)            (None, 500, 32)           4128      
_________________________________________________________________
conv1d_5 (Conv1D)            (None, 250, 32) 

_________________________________________________________________
conv1d_transpose_2 (Conv1DTr (None, None, 32)          4128      
_________________________________________________________________
conv1d_transpose_3 (Conv1DTr (None, None, 32)          4128      
_________________________________________________________________
conv1d_transpose_4 (Conv1DTr (None, None, 32)          4128      
_________________________________________________________________
conv1d_transpose_5 (Conv1DTr (None, None, 32)          4128      
_________________________________________________________________
conv1d_7 (Conv1D)            (None, None, 1)           33        
Total params: 24,801
Trainable params: 24,801
Non-trainable params: 0
_________________________________________________________________
The following Variables were used a Lambda layer's call (sample_from_codebook), but
are not present in its tracked objects:
  <tf.Variable 'vector_quantizer/Codebook:0' shape=(64, 32) dtype=float32>
It is

In [3]:
from scipy.io.wavfile import read
import os
DATA_PATH = r"/home/rithomas/data/IDMT-SMT-GUITAR_V2/dataset1/Fender_Strat_Clean_Neck_SC/audio"
p = os.path.join(DATA_PATH, 'G53-40100-1111-00001.wav')
print(read(p))

(44100, array([6, 5, 4, ..., 0, 0, 0], dtype=int16))


In [4]:
import tensorflow as tf


## Dummy dataset to test architecture
def generator():
    for path in os.listdir(DATA_PATH):
        if path.endswith('.wav'):
            yield tf.expand_dims(read(os.path.join(DATA_PATH, path))[1][:16000]
    
ds = tf.data.Dataset.from_generator(generator, output_types=tf.int32)
ds = ds.batch(4)
print(ds)
    
DEFAULT_CONFIGS = {
    'model_path': '/home/rithomas/cache/test_model',
    'learning_rate': 0.0001,
    'num_epochs': 100,
    'print_every': 100
}
trainer = VQ_VAE_Trainer(vq, DEFAULT_CONFIGS)
trainer.train(ds)

<BatchDataset shapes: <unknown>, types: tf.int32>
tf.Tensor(
[[[    2]
  [    2]
  [    4]
  ...
  [-1927]
  [-1805]
  [-1610]]

 [[  -48]
  [  -45]
  [  -41]
  ...
  [-1647]
  [-2072]
  [-2477]]

 [[   -4]
  [   -5]
  [   -5]
  ...
  [ -563]
  [ -353]
  [ -132]]

 [[    6]
  [    5]
  [    4]
  ...
  [ -201]
  [ -197]
  [ -187]]], shape=(4, 16000, 1), dtype=int32)
Initializing from scratch.
tf.Tensor([    4 16000     1], shape=(3,), dtype=int32)
tf.Tensor(nan, shape=(), dtype=float32)
tf.Tensor([    4 16000     1], shape=(3,), dtype=int32)
tf.Tensor(nan, shape=(), dtype=float32)
tf.Tensor(
[[[nan]
  [nan]
  [nan]
  ...
  [nan]
  [nan]
  [nan]]

 [[nan]
  [nan]
  [nan]
  ...
  [nan]
  [nan]
  [nan]]

 [[nan]
  [nan]
  [nan]
  ...
  [nan]
  [nan]
  [nan]]

 [[nan]
  [nan]
  [nan]
  ...
  [nan]
  [nan]
  [nan]]], shape=(4, 16000, 1), dtype=float32)
tf.Tensor([    4 16000     1], shape=(3,), dtype=int32)
tf.Tensor(nan, shape=(), dtype=float32)
tf.Tensor(
[[[nan]
  [nan]
  [nan]
  ...
  [n

tf.Tensor(nan, shape=(), dtype=float32)
tf.Tensor(
[[[nan]
  [nan]
  [nan]
  ...
  [nan]
  [nan]
  [nan]]

 [[nan]
  [nan]
  [nan]
  ...
  [nan]
  [nan]
  [nan]]

 [[nan]
  [nan]
  [nan]
  ...
  [nan]
  [nan]
  [nan]]

 [[nan]
  [nan]
  [nan]
  ...
  [nan]
  [nan]
  [nan]]], shape=(4, 16000, 1), dtype=float32)
tf.Tensor([    4 16000     1], shape=(3,), dtype=int32)
tf.Tensor(nan, shape=(), dtype=float32)
tf.Tensor(
[[[nan]
  [nan]
  [nan]
  ...
  [nan]
  [nan]
  [nan]]

 [[nan]
  [nan]
  [nan]
  ...
  [nan]
  [nan]
  [nan]]

 [[nan]
  [nan]
  [nan]
  ...
  [nan]
  [nan]
  [nan]]

 [[nan]
  [nan]
  [nan]
  ...
  [nan]
  [nan]
  [nan]]], shape=(4, 16000, 1), dtype=float32)
tf.Tensor([    2 16000     1], shape=(3,), dtype=int32)
tf.Tensor(nan, shape=(), dtype=float32)
tf.Tensor(
[[[nan]
  [nan]
  [nan]
  ...
  [nan]
  [nan]
  [nan]]

 [[nan]
  [nan]
  [nan]
  ...
  [nan]
  [nan]
  [nan]]], shape=(2, 16000, 1), dtype=float32)
tf.Tensor([    4 16000     1], shape=(3,), dtype=int32)
tf.Tenso

tf.Tensor(nan, shape=(), dtype=float32)
tf.Tensor(
[[[nan]
  [nan]
  [nan]
  ...
  [nan]
  [nan]
  [nan]]

 [[nan]
  [nan]
  [nan]
  ...
  [nan]
  [nan]
  [nan]]

 [[nan]
  [nan]
  [nan]
  ...
  [nan]
  [nan]
  [nan]]

 [[nan]
  [nan]
  [nan]
  ...
  [nan]
  [nan]
  [nan]]], shape=(4, 16000, 1), dtype=float32)
tf.Tensor([    4 16000     1], shape=(3,), dtype=int32)
tf.Tensor(nan, shape=(), dtype=float32)
tf.Tensor(
[[[nan]
  [nan]
  [nan]
  ...
  [nan]
  [nan]
  [nan]]

 [[nan]
  [nan]
  [nan]
  ...
  [nan]
  [nan]
  [nan]]

 [[nan]
  [nan]
  [nan]
  ...
  [nan]
  [nan]
  [nan]]

 [[nan]
  [nan]
  [nan]
  ...
  [nan]
  [nan]
  [nan]]], shape=(4, 16000, 1), dtype=float32)
tf.Tensor([    4 16000     1], shape=(3,), dtype=int32)
tf.Tensor(nan, shape=(), dtype=float32)
tf.Tensor(
[[[nan]
  [nan]
  [nan]
  ...
  [nan]
  [nan]
  [nan]]

 [[nan]
  [nan]
  [nan]
  ...
  [nan]
  [nan]
  [nan]]

 [[nan]
  [nan]
  [nan]
  ...
  [nan]
  [nan]
  [nan]]

 [[nan]
  [nan]
  [nan]
  ...
  [nan]
  [nan]

KeyboardInterrupt: 