In [1]:
# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, 
print_function, unicode_literals

import tensorflow as tf
from tensorflow.keras import layers
tf.keras.backend.clear_session()  # For easy reset of notebook state.

import random
import numpy as np
#import matplotlib.pyplot as plt
#import pandas as pd
#import seaborn as sns
#from sklearn import preprocessing

import os
from warnings import warn
import json
#from nsgt import NSGT_sliced, LogScale, LinScale, MelScale, OctScale, CQ_NSGT
from scipy.io import wavfile
#import wavefile
from IPython.display import Audio

In [2]:
import librosa

In [3]:
my_audio = "D://my_workspace/dataset/Electroacoustic-Gigantic/audio/04 - Horacio Vaggione - Ash.wav"
my_example_duration = 5 #in seconds

In [6]:
total_duration = librosa.get_duration(filename=my_audio)

In [7]:
my_offset = random.randint(0, int(total_duration)- my_example_duration)

In [8]:
s, fs = librosa.load(my_audio, duration= my_example_duration, 
                     offset= my_offset, sr=None)


In [10]:
Audio(s, rate=fs)

In [13]:
hop_length = 128
bins_per_octave = 48
num_octaves = 8
n_bins = num_octaves * bins_per_octave

## Calculate Constant Q Transform

In [14]:
C_complex = librosa.cqt(y=s, sr=fs, hop_length= hop_length, 
                        bins_per_octave=bins_per_octave, n_bins=n_bins)

In [16]:
y_icqt_full = librosa.icqt(C_complex, hop_length=hop_length, 
                           sr=fs, bins_per_octave=bins_per_octave)


In [17]:
Audio(y_icqt_full,rate=fs)

## Phase reconstruction

In [18]:
C = np.abs(C_complex)

In [25]:
y_mag = librosa.icqt(C, hop_length=hop_length, 
                        sr=fs, bins_per_octave=bins_per_octave)

In [26]:
Audio(y_mag, rate=fs)

## Inverse magnitude CQT using Griffin-Lim phase estimation


In [27]:
y_inv = librosa.griffinlim_cqt(C, sr=fs, n_iter=8, 
                               hop_length=hop_length, 
                               bins_per_octave=bins_per_octave)

In [29]:
Audio(y_inv, rate=fs)

In [30]:
AUTOTUNE = tf.data.experimental.AUTOTUNE

In [32]:
import configparser
from pathlib import Path 

In [39]:
dataset = 'D:\\my_workspace\\dataset\\Electroacoustic-Roads-with-silence'
run_path = 'D:\\my_workspace\\dataset\\Electroacoustic-Roads-with-silence\\func-timbre-vae-cedar\\run-010'
#Get configs
config = configparser.ConfigParser(allow_no_value=True)
config_path = os.path.join(run_path, 'config.ini')
config.read(config_path)

['D:\\my_workspace\\dataset\\Electroacoustic-Roads-with-silence\\func-timbre-vae-cedar\\run-010\\config.ini']

In [40]:
#import audio configs 
sample_rate = config['audio'].getint('sample_rate')
hop_length = config['audio'].getint('hop_length')
bins_per_octave = config['audio'].getint('bins_per_octave')
num_octaves = config['audio'].getint('num_octaves')
n_bins = int(num_octaves * bins_per_octave)
n_iter = config['audio'].getint('n_iter')

In [41]:
#dataset
cqt_dataset = config['dataset'].get('cqt_dataset')
my_cqt = os.path.join(dataset, cqt_dataset)
my_audio = os.path.join(dataset, 'audio')

#Training configs
batch_size = config['training'].getint('batch_size')

#Model configs
latent_dim = config['VAE'].getint('latent_dim')

#etc
example_length = config['extra'].getint('example_length')
normalize_examples = config['extra'].getboolean('normalize_examples')

In [42]:
AUTOTUNE = tf.data.experimental.AUTOTUNE

#We need to define the custom Sampling Layer
class Sampling(layers.Layer):
#Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""

  def call(self, inputs):
    z_mean, z_log_var = inputs
    batch = tf.shape(z_mean)[0]
    dim = tf.shape(z_mean)[1]
    epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
    return z_mean + tf.exp(0.5 * z_log_var) * epsilon


#load the model
my_model_path = os.path.join(os.path.join(run_path, 'model'),
                             'mymodel_last.h5')
with tf.keras.utils.CustomObjectScope({'Sampling': Sampling}):
  trained_model = tf.keras.models.load_model(my_model_path)

trained_model.summary()

Model: "vae"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
encoder_input (InputLayer)      [(None, 384)]        0                                            
__________________________________________________________________________________________________
dense (Dense)                   (None, 2048)         788480      encoder_input[0][0]              
__________________________________________________________________________________________________
z_mean (Dense)                  (None, 256)          524544      dense[0][0]                      
__________________________________________________________________________________________________
z_log_var (Dense)               (None, 256)          524544      dense[0][0]                      
________________________________________________________________________________________________

In [43]:
encoder = tf.keras.Model(inputs = trained_model.input, 
                         outputs = [trained_model.get_layer("z_mean").output, 
                                    trained_model.get_layer("z_log_var").output], 
                         name='encoder')
encoder.summary()

Model: "encoder"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
encoder_input (InputLayer)      [(None, 384)]        0                                            
__________________________________________________________________________________________________
dense (Dense)                   (None, 2048)         788480      encoder_input[0][0]              
__________________________________________________________________________________________________
z_mean (Dense)                  (None, 256)          524544      dense[0][0]                      
__________________________________________________________________________________________________
z_log_var (Dense)               (None, 256)          524544      dense[0][0]                      
Total params: 1,837,568
Trainable params: 1,837,568
Non-trainable params: 0
________________

In [44]:
decoder = tf.keras.Model(inputs = trained_model.get_layer('decoder').input, 
                         outputs = trained_model.get_layer('decoder').output, 
                         name='decoder')
decoder.summary()

Model: "decoder"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
z_sampling (InputLayer)      [(None, 256)]             0         
_________________________________________________________________
dense_1 (Dense)              (None, 2048)              526336    
_________________________________________________________________
dense_2 (Dense)              (None, 384)               786816    
Total params: 1,313,152
Trainable params: 1,313,152
Non-trainable params: 0
_________________________________________________________________


In [45]:
phase reconstruction

SyntaxError: invalid syntax (<ipython-input-45-12858106043a>, line 1)

In [None]:
variational autoencoders 